diff --git a/libraries/bzip2/CHANGES b/libraries/bzip2/CHANGES
new file mode 100644
index 000000000..30afead25
--- /dev/null
+++ b/libraries/bzip2/CHANGES
@@ -0,0 +1,356 @@
+ ------------------------------------------------------------------
+ This file is part of bzip2/libbzip2, a program and library for
+ lossless, block-sorting data compression.
+
+ bzip2/libbzip2 version 1.0.8 of 13 July 2019
+ Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
+
+ Please read the WARNING, DISCLAIMER and PATENTS sections in the 
+ README file.
+
+ This program is released under the terms of the license contained
+ in the file LICENSE.
+ ------------------------------------------------------------------
+
+
+0.9.0
+~~~~~
+First version.
+
+
+0.9.0a
+~~~~~~
+Removed 'ranlib' from Makefile, since most modern Unix-es 
+don't need it, or even know about it.
+
+
+0.9.0b
+~~~~~~
+Fixed a problem with error reporting in bzip2.c.  This does not effect
+the library in any way.  Problem is: versions 0.9.0 and 0.9.0a (of the
+program proper) compress and decompress correctly, but give misleading
+error messages (internal panics) when an I/O error occurs, instead of
+reporting the problem correctly.  This shouldn't give any data loss
+(as far as I can see), but is confusing.
+
+Made the inline declarations disappear for non-GCC compilers.
+
+
+0.9.0c
+~~~~~~
+Fixed some problems in the library pertaining to some boundary cases.
+This makes the library behave more correctly in those situations.  The
+fixes apply only to features (calls and parameters) not used by
+bzip2.c, so the non-fixedness of them in previous versions has no
+effect on reliability of bzip2.c.
+
+In bzlib.c:
+   * made zero-length BZ_FLUSH work correctly in bzCompress().
+   * fixed bzWrite/bzRead to ignore zero-length requests.
+   * fixed bzread to correctly handle read requests after EOF.
+   * wrong parameter order in call to bzDecompressInit in
+     bzBuffToBuffDecompress.  Fixed.
+
+In compress.c:
+   * changed setting of nGroups in sendMTFValues() so as to 
+     do a bit better on small files.  This _does_ effect
+     bzip2.c.
+
+
+0.9.5a
+~~~~~~
+Major change: add a fallback sorting algorithm (blocksort.c)
+to give reasonable behaviour even for very repetitive inputs.
+Nuked --repetitive-best and --repetitive-fast since they are
+no longer useful.
+
+Minor changes: mostly a whole bunch of small changes/
+bugfixes in the driver (bzip2.c).  Changes pertaining to the
+user interface are:
+
+   allow decompression of symlink'd files to stdout
+   decompress/test files even without .bz2 extension
+   give more accurate error messages for I/O errors
+   when compressing/decompressing to stdout, don't catch control-C
+   read flags from BZIP2 and BZIP environment variables
+   decline to break hard links to a file unless forced with -f
+   allow -c flag even with no filenames
+   preserve file ownerships as far as possible
+   make -s -1 give the expected block size (100k)
+   add a flag -q --quiet to suppress nonessential warnings
+   stop decoding flags after --, so files beginning in - can be handled
+   resolved inconsistent naming: bzcat or bz2cat ?
+   bzip2 --help now returns 0
+
+Programming-level changes are:
+
+   fixed syntax error in GET_LL4 for Borland C++ 5.02
+   let bzBuffToBuffDecompress return BZ_DATA_ERROR{_MAGIC}
+   fix overshoot of mode-string end in bzopen_or_bzdopen
+   wrapped bzlib.h in #ifdef __cplusplus ... extern "C" { ... }
+   close file handles under all error conditions
+   added minor mods so it compiles with DJGPP out of the box
+   fixed Makefile so it doesn't give problems with BSD make
+   fix uninitialised memory reads in dlltest.c
+
+0.9.5b
+~~~~~~
+Open stdin/stdout in binary mode for DJGPP.
+
+0.9.5c
+~~~~~~
+Changed BZ_N_OVERSHOOT to be ... + 2 instead of ... + 1.  The + 1
+version could cause the sorted order to be wrong in some extremely
+obscure cases.  Also changed setting of quadrant in blocksort.c.
+
+0.9.5d
+~~~~~~
+The only functional change is to make bzlibVersion() in the library
+return the correct string.  This has no effect whatsoever on the
+functioning of the bzip2 program or library.  Added a couple of casts
+so the library compiles without warnings at level 3 in MS Visual
+Studio 6.0.  Included a Y2K statement in the file Y2K_INFO.  All other
+changes are minor documentation changes.
+
+1.0
+~~~
+Several minor bugfixes and enhancements:
+
+* Large file support.  The library uses 64-bit counters to
+  count the volume of data passing through it.  bzip2.c 
+  is now compiled with -D_FILE_OFFSET_BITS=64 to get large
+  file support from the C library.  -v correctly prints out
+  file sizes greater than 4 gigabytes.  All these changes have
+  been made without assuming a 64-bit platform or a C compiler
+  which supports 64-bit ints, so, except for the C library
+  aspect, they are fully portable.
+
+* Decompression robustness.  The library/program should be
+  robust to any corruption of compressed data, detecting and
+  handling _all_ corruption, instead of merely relying on
+  the CRCs.  What this means is that the program should 
+  never crash, given corrupted data, and the library should
+  always return BZ_DATA_ERROR.
+
+* Fixed an obscure race-condition bug only ever observed on
+  Solaris, in which, if you were very unlucky and issued
+  control-C at exactly the wrong time, both input and output
+  files would be deleted.
+
+* Don't run out of file handles on test/decompression when
+  large numbers of files have invalid magic numbers.
+
+* Avoid library namespace pollution.  Prefix all exported 
+  symbols with BZ2_.
+
+* Minor sorting enhancements from my DCC2000 paper.
+
+* Advance the version number to 1.0, so as to counteract the
+  (false-in-this-case) impression some people have that programs 
+  with version numbers less than 1.0 are in some way, experimental,
+  pre-release versions.
+
+* Create an initial Makefile-libbz2_so to build a shared library.
+  Yes, I know I should really use libtool et al ...
+
+* Make the program exit with 2 instead of 0 when decompression
+  fails due to a bad magic number (ie, an invalid bzip2 header).
+  Also exit with 1 (as the manual claims :-) whenever a diagnostic
+  message would have been printed AND the corresponding operation 
+  is aborted, for example
+     bzip2: Output file xx already exists.
+  When a diagnostic message is printed but the operation is not 
+  aborted, for example
+     bzip2: Can't guess original name for wurble -- using wurble.out
+  then the exit value 0 is returned, unless some other problem is
+  also detected.
+
+  I think it corresponds more closely to what the manual claims now.
+
+
+1.0.1
+~~~~~
+* Modified dlltest.c so it uses the new BZ2_ naming scheme.
+* Modified makefile-msc to fix minor build probs on Win2k.
+* Updated README.COMPILATION.PROBLEMS.
+
+There are no functionality changes or bug fixes relative to version
+1.0.0.  This is just a documentation update + a fix for minor Win32
+build problems.  For almost everyone, upgrading from 1.0.0 to 1.0.1 is
+utterly pointless.  Don't bother.
+
+
+1.0.2
+~~~~~
+A bug fix release, addressing various minor issues which have appeared
+in the 18 or so months since 1.0.1 was released.  Most of the fixes
+are to do with file-handling or documentation bugs.  To the best of my
+knowledge, there have been no data-loss-causing bugs reported in the
+compression/decompression engine of 1.0.0 or 1.0.1.
+
+Note that this release does not improve the rather crude build system
+for Unix platforms.  The general plan here is to autoconfiscate/
+libtoolise 1.0.2 soon after release, and release the result as 1.1.0
+or perhaps 1.2.0.  That, however, is still just a plan at this point.
+
+Here are the changes in 1.0.2.  Bug-reporters and/or patch-senders in
+parentheses.
+
+* Fix an infinite segfault loop in 1.0.1 when a directory is
+  encountered in -f (force) mode.
+     (Trond Eivind Glomsrod, Nicholas Nethercote, Volker Schmidt)
+
+* Avoid double fclose() of output file on certain I/O error paths.
+     (Solar Designer)
+
+* Don't fail with internal error 1007 when fed a long stream (> 48MB)
+  of byte 251.  Also print useful message suggesting that 1007s may be
+  caused by bad memory.
+     (noticed by Juan Pedro Vallejo, fixed by me)
+
+* Fix uninitialised variable silly bug in demo prog dlltest.c.
+     (Jorj Bauer)
+
+* Remove 512-MB limitation on recovered file size for bzip2recover
+  on selected platforms which support 64-bit ints.  At the moment
+  all GCC supported platforms, and Win32.
+     (me, Alson van der Meulen)
+
+* Hard-code header byte values, to give correct operation on platforms
+  using EBCDIC as their native character set (IBM's OS/390).
+     (Leland Lucius)
+
+* Copy file access times correctly.
+     (Marty Leisner)
+
+* Add distclean and check targets to Makefile.
+     (Michael Carmack)
+
+* Parameterise use of ar and ranlib in Makefile.  Also add $(LDFLAGS).
+     (Rich Ireland, Bo Thorsen)
+
+* Pass -p (create parent dirs as needed) to mkdir during make install.
+     (Jeremy Fusco)
+
+* Dereference symlinks when copying file permissions in -f mode.
+     (Volker Schmidt)
+
+* Majorly simplify implementation of uInt64_qrm10.
+     (Bo Lindbergh)
+
+* Check the input file still exists before deleting the output one,
+  when aborting in cleanUpAndFail().
+     (Joerg Prante, Robert Linden, Matthias Krings)
+
+Also a bunch of patches courtesy of Philippe Troin, the Debian maintainer
+of bzip2:
+
+* Wrapper scripts (with manpages): bzdiff, bzgrep, bzmore.
+
+* Spelling changes and minor enhancements in bzip2.1.
+
+* Avoid race condition between creating the output file and setting its
+  interim permissions safely, by using fopen_output_safely().
+  No changes to bzip2recover since there is no issue with file
+  permissions there.
+
+* do not print senseless report with -v when compressing an empty
+  file.
+
+* bzcat -f works on non-bzip2 files.
+
+* do not try to escape shell meta-characters on unix (the shell takes
+  care of these).
+
+* added --fast and --best aliases for -1 -9 for gzip compatibility.
+
+
+1.0.3 (15 Feb 05)
+~~~~~~~~~~~~~~~~~
+Fixes some minor bugs since the last version, 1.0.2.
+
+* Further robustification against corrupted compressed data.
+  There are currently no known bitstreams which can cause the
+  decompressor to crash, loop or access memory which does not
+  belong to it.  If you are using bzip2 or the library to 
+  decompress bitstreams from untrusted sources, an upgrade
+  to 1.0.3 is recommended.  This fixes CAN-2005-1260.
+
+* The documentation has been converted to XML, from which html
+  and pdf can be derived.
+
+* Various minor bugs in the documentation have been fixed.
+
+* Fixes for various compilation warnings with newer versions of
+  gcc, and on 64-bit platforms.
+
+* The BZ_NO_STDIO cpp symbol was not properly observed in 1.0.2.
+  This has been fixed.
+
+
+1.0.4 (20 Dec 06)
+~~~~~~~~~~~~~~~~~
+Fixes some minor bugs since the last version, 1.0.3.
+
+* Fix file permissions race problem (CAN-2005-0953).
+
+* Avoid possible segfault in BZ2_bzclose.  From Coverity's NetBSD
+  scan.
+
+* 'const'/prototype cleanups in the C code.
+
+* Change default install location to /usr/local, and handle multiple
+  'make install's without error.
+
+* Sanitise file names more carefully in bzgrep.  Fixes CAN-2005-0758
+  to the extent that applies to bzgrep.
+
+* Use 'mktemp' rather than 'tempfile' in bzdiff.
+
+* Tighten up a couple of assertions in blocksort.c following automated
+  analysis.
+
+* Fix minor doc/comment bugs.
+
+
+1.0.5 (10 Dec 07)
+~~~~~~~~~~~~~~~~~
+Security fix only.  Fixes CERT-FI 20469 as it applies to bzip2.
+
+
+1.0.6 (6 Sept 10)
+~~~~~~~~~~~~~~~~~
+
+* Security fix for CVE-2010-0405.  This was reported by Mikolaj
+  Izdebski.
+
+* Make the documentation build on Ubuntu 10.04
+
+1.0.7 (27 Jun 19)
+~~~~~~~~~~~~~~~~~
+
+* Fix undefined behavior in the macros SET_BH, CLEAR_BH, & ISSET_BH
+
+* bzip2: Fix return value when combining --test,-t and -q.
+
+* bzip2recover: Fix buffer overflow for large argv[0]
+
+* bzip2recover: Fix use after free issue with outFile (CVE-2016-3189)
+
+* Make sure nSelectors is not out of range (CVE-2019-12900)
+
+1.0.8 (13 Jul 19)
+~~~~~~~~~~~~~~~~~
+
+* Accept as many selectors as the file format allows.
+  This relaxes the fix for CVE-2019-12900 from 1.0.7
+  so that bzip2 allows decompression of bz2 files that
+  use (too) many selectors again.
+
+* Fix handling of large (> 4GB) files on Windows.
+
+* Cleanup of bzdiff and bzgrep scripts so they don't use
+  any bash extensions and handle multiple archives correctly.
+
+* There is now a bz2-files testsuite at
+  https://sourceware.org/git/bzip2-tests.git
diff --git a/libraries/bzip2/CMakeLists.txt b/libraries/bzip2/CMakeLists.txt
new file mode 100644
index 000000000..7fefb2bf2
--- /dev/null
+++ b/libraries/bzip2/CMakeLists.txt
@@ -0,0 +1,18 @@
+cmake_minimum_required( VERSION 2.8.7 )
+
+make_release_only()
+
+if( ZD_CMAKE_COMPILER_IS_GNUC_COMPATIBLE )
+	set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -fomit-frame-pointer" )
+endif()
+
+add_definitions( -DBZ_NO_STDIO )
+add_library( bz2 STATIC
+    blocksort.c
+    bzlib.c
+    compress.c
+    crctable.c
+    decompress.c
+    huffman.c
+    randtable.c )
+target_link_libraries( bz2 )
diff --git a/libraries/bzip2/LICENSE b/libraries/bzip2/LICENSE
new file mode 100644
index 000000000..81a37eab7
--- /dev/null
+++ b/libraries/bzip2/LICENSE
@@ -0,0 +1,42 @@
+
+--------------------------------------------------------------------------
+
+This program, "bzip2", the associated library "libbzip2", and all
+documentation, are copyright (C) 1996-2019 Julian R Seward.  All
+rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+2. The origin of this software must not be misrepresented; you must 
+   not claim that you wrote the original software.  If you use this 
+   software in a product, an acknowledgment in the product 
+   documentation would be appreciated but is not required.
+
+3. Altered source versions must be plainly marked as such, and must
+   not be misrepresented as being the original software.
+
+4. The name of the author may not be used to endorse or promote 
+   products derived from this software without specific prior written 
+   permission.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Julian Seward, jseward@acm.org
+bzip2/libbzip2 version 1.0.8 of 13 July 2019
+
+--------------------------------------------------------------------------
diff --git a/libraries/bzip2/README b/libraries/bzip2/README
new file mode 100644
index 000000000..b9c6099fd
--- /dev/null
+++ b/libraries/bzip2/README
@@ -0,0 +1,196 @@
+
+This is the README for bzip2/libzip2.
+This version is fully compatible with the previous public releases.
+
+------------------------------------------------------------------
+This file is part of bzip2/libbzip2, a program and library for
+lossless, block-sorting data compression.
+
+bzip2/libbzip2 version 1.0.8 of 13 July 2019
+Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
+
+Please read the WARNING, DISCLAIMER and PATENTS sections in this file.
+
+This program is released under the terms of the license contained
+in the file LICENSE.
+------------------------------------------------------------------
+
+Complete documentation is available in Postscript form (manual.ps),
+PDF (manual.pdf) or html (manual.html).  A plain-text version of the
+manual page is available as bzip2.txt.
+
+
+HOW TO BUILD -- UNIX
+
+Type 'make'.  This builds the library libbz2.a and then the programs
+bzip2 and bzip2recover.  Six self-tests are run.  If the self-tests
+complete ok, carry on to installation:
+
+To install in /usr/local/bin, /usr/local/lib, /usr/local/man and
+/usr/local/include, type
+
+   make install
+
+To install somewhere else, eg, /xxx/yyy/{bin,lib,man,include}, type
+
+   make install PREFIX=/xxx/yyy
+
+If you are (justifiably) paranoid and want to see what 'make install'
+is going to do, you can first do
+
+   make -n install                      or
+   make -n install PREFIX=/xxx/yyy      respectively.
+
+The -n instructs make to show the commands it would execute, but not
+actually execute them.
+
+
+HOW TO BUILD -- UNIX, shared library libbz2.so.
+
+Do 'make -f Makefile-libbz2_so'.  This Makefile seems to work for
+Linux-ELF (RedHat 7.2 on an x86 box), with gcc.  I make no claims
+that it works for any other platform, though I suspect it probably
+will work for most platforms employing both ELF and gcc.
+
+bzip2-shared, a client of the shared library, is also built, but not
+self-tested.  So I suggest you also build using the normal Makefile,
+since that conducts a self-test.  A second reason to prefer the
+version statically linked to the library is that, on x86 platforms,
+building shared objects makes a valuable register (%ebx) unavailable
+to gcc, resulting in a slowdown of 10%-20%, at least for bzip2.
+
+Important note for people upgrading .so's from 0.9.0/0.9.5 to version
+1.0.X.  All the functions in the library have been renamed, from (eg)
+bzCompress to BZ2_bzCompress, to avoid namespace pollution.
+Unfortunately this means that the libbz2.so created by
+Makefile-libbz2_so will not work with any program which used an older
+version of the library.  I do encourage library clients to make the
+effort to upgrade to use version 1.0, since it is both faster and more
+robust than previous versions.
+
+
+HOW TO BUILD -- Windows 95, NT, DOS, Mac, etc.
+
+It's difficult for me to support compilation on all these platforms.
+My approach is to collect binaries for these platforms, and put them
+on the master web site (https://sourceware.org/bzip2/).  Look there.  However
+(FWIW), bzip2-1.0.X is very standard ANSI C and should compile
+unmodified with MS Visual C.  If you have difficulties building, you
+might want to read README.COMPILATION.PROBLEMS.
+
+At least using MS Visual C++ 6, you can build from the unmodified
+sources by issuing, in a command shell: 
+
+   nmake -f makefile.msc
+
+(you may need to first run the MSVC-provided script VCVARS32.BAT
+ so as to set up paths to the MSVC tools correctly).
+
+
+VALIDATION
+
+Correct operation, in the sense that a compressed file can always be
+decompressed to reproduce the original, is obviously of paramount
+importance.  To validate bzip2, I used a modified version of Mark
+Nelson's churn program.  Churn is an automated test driver which
+recursively traverses a directory structure, using bzip2 to compress
+and then decompress each file it encounters, and checking that the
+decompressed data is the same as the original.
+
+
+
+Please read and be aware of the following:
+
+WARNING:
+
+   This program and library (attempts to) compress data by 
+   performing several non-trivial transformations on it.  
+   Unless you are 100% familiar with *all* the algorithms 
+   contained herein, and with the consequences of modifying them, 
+   you should NOT meddle with the compression or decompression 
+   machinery.  Incorrect changes can and very likely *will* 
+   lead to disastrous loss of data.
+
+
+DISCLAIMER:
+
+   I TAKE NO RESPONSIBILITY FOR ANY LOSS OF DATA ARISING FROM THE
+   USE OF THIS PROGRAM/LIBRARY, HOWSOEVER CAUSED.
+
+   Every compression of a file implies an assumption that the
+   compressed file can be decompressed to reproduce the original.
+   Great efforts in design, coding and testing have been made to
+   ensure that this program works correctly.  However, the complexity
+   of the algorithms, and, in particular, the presence of various
+   special cases in the code which occur with very low but non-zero
+   probability make it impossible to rule out the possibility of bugs
+   remaining in the program.  DO NOT COMPRESS ANY DATA WITH THIS
+   PROGRAM UNLESS YOU ARE PREPARED TO ACCEPT THE POSSIBILITY, HOWEVER
+   SMALL, THAT THE DATA WILL NOT BE RECOVERABLE.
+
+   That is not to say this program is inherently unreliable.  
+   Indeed, I very much hope the opposite is true.  bzip2/libbzip2 
+   has been carefully constructed and extensively tested.
+
+
+PATENTS:
+
+   To the best of my knowledge, bzip2/libbzip2 does not use any 
+   patented algorithms.  However, I do not have the resources 
+   to carry out a patent search.  Therefore I cannot give any 
+   guarantee of the above statement.
+
+
+
+WHAT'S NEW IN 0.9.0 (as compared to 0.1pl2) ?
+
+   * Approx 10% faster compression, 30% faster decompression
+   * -t (test mode) is a lot quicker
+   * Can decompress concatenated compressed files
+   * Programming interface, so programs can directly read/write .bz2 files
+   * Less restrictive (BSD-style) licensing
+   * Flag handling more compatible with GNU gzip
+   * Much more documentation, i.e., a proper user manual
+   * Hopefully, improved portability (at least of the library)
+
+WHAT'S NEW IN 0.9.5 ?
+
+   * Compression speed is much less sensitive to the input
+     data than in previous versions.  Specifically, the very
+     slow performance caused by repetitive data is fixed.
+   * Many small improvements in file and flag handling.
+   * A Y2K statement.
+
+WHAT'S NEW IN 1.0.x ?
+
+   See the CHANGES file.
+
+I hope you find bzip2 useful.  Feel free to contact the developers at
+   bzip2-devel@sourceware.org
+if you have any suggestions or queries.  Many people mailed me with
+comments, suggestions and patches after the releases of bzip-0.15,
+bzip-0.21, and bzip2 versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0, 1.0.1,
+1.0.2 and 1.0.3, and the changes in bzip2 are largely a result of this
+feedback.  I thank you for your comments.
+
+bzip2's "home" is https://sourceware.org/bzip2/
+
+Julian Seward
+jseward@acm.org
+Cambridge, UK.
+
+18     July 1996 (version 0.15)
+25   August 1996 (version 0.21)
+ 7   August 1997 (bzip2, version 0.1)
+29   August 1997 (bzip2, version 0.1pl2)
+23   August 1998 (bzip2, version 0.9.0)
+ 8     June 1999 (bzip2, version 0.9.5)
+ 4     Sept 1999 (bzip2, version 0.9.5d)
+ 5      May 2000 (bzip2, version 1.0pre8)
+30 December 2001 (bzip2, version 1.0.2pre1)
+15 February 2005 (bzip2, version 1.0.3)
+20 December 2006 (bzip2, version 1.0.4)
+10 December 2007 (bzip2, version 1.0.5)
+ 6     Sept 2010 (bzip2, version 1.0.6)
+27     June 2019 (bzip2, version 1.0.7)
+13     July 2019 (bzip2, version 1.0.8)
diff --git a/libraries/bzip2/blocksort.c b/libraries/bzip2/blocksort.c
new file mode 100644
index 000000000..92d81fe28
--- /dev/null
+++ b/libraries/bzip2/blocksort.c
@@ -0,0 +1,1094 @@
+
+/*-------------------------------------------------------------*/
+/*--- Block sorting machinery                               ---*/
+/*---                                           blocksort.c ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+   This file is part of bzip2/libbzip2, a program and library for
+   lossless, block-sorting data compression.
+
+   bzip2/libbzip2 version 1.0.8 of 13 July 2019
+   Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
+
+   Please read the WARNING, DISCLAIMER and PATENTS sections in the 
+   README file.
+
+   This program is released under the terms of the license contained
+   in the file LICENSE.
+   ------------------------------------------------------------------ */
+
+
+#include "bzlib_private.h"
+
+/*---------------------------------------------*/
+/*--- Fallback O(N log(N)^2) sorting        ---*/
+/*--- algorithm, for repetitive blocks      ---*/
+/*---------------------------------------------*/
+
+/*---------------------------------------------*/
+static 
+__inline__
+void fallbackSimpleSort ( UInt32* fmap, 
+                          UInt32* eclass, 
+                          Int32   lo, 
+                          Int32   hi )
+{
+   Int32 i, j, tmp;
+   UInt32 ec_tmp;
+
+   if (lo == hi) return;
+
+   if (hi - lo > 3) {
+      for ( i = hi-4; i >= lo; i-- ) {
+         tmp = fmap[i];
+         ec_tmp = eclass[tmp];
+         for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
+            fmap[j-4] = fmap[j];
+         fmap[j-4] = tmp;
+      }
+   }
+
+   for ( i = hi-1; i >= lo; i-- ) {
+      tmp = fmap[i];
+      ec_tmp = eclass[tmp];
+      for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
+         fmap[j-1] = fmap[j];
+      fmap[j-1] = tmp;
+   }
+}
+
+
+/*---------------------------------------------*/
+#define fswap(zz1, zz2) \
+   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
+
+#define fvswap(zzp1, zzp2, zzn)       \
+{                                     \
+   Int32 yyp1 = (zzp1);               \
+   Int32 yyp2 = (zzp2);               \
+   Int32 yyn  = (zzn);                \
+   while (yyn > 0) {                  \
+      fswap(fmap[yyp1], fmap[yyp2]);  \
+      yyp1++; yyp2++; yyn--;          \
+   }                                  \
+}
+
+
+#define fmin(a,b) ((a) < (b)) ? (a) : (b)
+
+#define fpush(lz,hz) { stackLo[sp] = lz; \
+                       stackHi[sp] = hz; \
+                       sp++; }
+
+#define fpop(lz,hz) { sp--;              \
+                      lz = stackLo[sp];  \
+                      hz = stackHi[sp]; }
+
+#define FALLBACK_QSORT_SMALL_THRESH 10
+#define FALLBACK_QSORT_STACK_SIZE   100
+
+
+static
+void fallbackQSort3 ( UInt32* fmap, 
+                      UInt32* eclass,
+                      Int32   loSt, 
+                      Int32   hiSt )
+{
+   Int32 unLo, unHi, ltLo, gtHi, n, m;
+   Int32 sp, lo, hi;
+   UInt32 med, r, r3;
+   Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
+   Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
+
+   r = 0;
+
+   sp = 0;
+   fpush ( loSt, hiSt );
+
+   while (sp > 0) {
+
+      AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
+
+      fpop ( lo, hi );
+      if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
+         fallbackSimpleSort ( fmap, eclass, lo, hi );
+         continue;
+      }
+
+      /* Random partitioning.  Median of 3 sometimes fails to
+         avoid bad cases.  Median of 9 seems to help but 
+         looks rather expensive.  This too seems to work but
+         is cheaper.  Guidance for the magic constants 
+         7621 and 32768 is taken from Sedgewick's algorithms
+         book, chapter 35.
+      */
+      r = ((r * 7621) + 1) % 32768;
+      r3 = r % 3;
+      if (r3 == 0) med = eclass[fmap[lo]]; else
+      if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
+                   med = eclass[fmap[hi]];
+
+      unLo = ltLo = lo;
+      unHi = gtHi = hi;
+
+      while (1) {
+         while (1) {
+            if (unLo > unHi) break;
+            n = (Int32)eclass[fmap[unLo]] - (Int32)med;
+            if (n == 0) { 
+               fswap(fmap[unLo], fmap[ltLo]); 
+               ltLo++; unLo++; 
+               continue; 
+            };
+            if (n > 0) break;
+            unLo++;
+         }
+         while (1) {
+            if (unLo > unHi) break;
+            n = (Int32)eclass[fmap[unHi]] - (Int32)med;
+            if (n == 0) { 
+               fswap(fmap[unHi], fmap[gtHi]); 
+               gtHi--; unHi--; 
+               continue; 
+            };
+            if (n < 0) break;
+            unHi--;
+         }
+         if (unLo > unHi) break;
+         fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
+      }
+
+      AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
+
+      if (gtHi < ltLo) continue;
+
+      n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
+      m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
+
+      n = lo + unLo - ltLo - 1;
+      m = hi - (gtHi - unHi) + 1;
+
+      if (n - lo > hi - m) {
+         fpush ( lo, n );
+         fpush ( m, hi );
+      } else {
+         fpush ( m, hi );
+         fpush ( lo, n );
+      }
+   }
+}
+
+#undef fmin
+#undef fpush
+#undef fpop
+#undef fswap
+#undef fvswap
+#undef FALLBACK_QSORT_SMALL_THRESH
+#undef FALLBACK_QSORT_STACK_SIZE
+
+
+/*---------------------------------------------*/
+/* Pre:
+      nblock > 0
+      eclass exists for [0 .. nblock-1]
+      ((UChar*)eclass) [0 .. nblock-1] holds block
+      ptr exists for [0 .. nblock-1]
+
+   Post:
+      ((UChar*)eclass) [0 .. nblock-1] holds block
+      All other areas of eclass destroyed
+      fmap [0 .. nblock-1] holds sorted order
+      bhtab [ 0 .. 2+(nblock/32) ] destroyed
+*/
+
+#define       SET_BH(zz)  bhtab[(zz) >> 5] |= ((UInt32)1 << ((zz) & 31))
+#define     CLEAR_BH(zz)  bhtab[(zz) >> 5] &= ~((UInt32)1 << ((zz) & 31))
+#define     ISSET_BH(zz)  (bhtab[(zz) >> 5] & ((UInt32)1 << ((zz) & 31)))
+#define      WORD_BH(zz)  bhtab[(zz) >> 5]
+#define UNALIGNED_BH(zz)  ((zz) & 0x01f)
+
+static
+void fallbackSort ( UInt32* fmap, 
+                    UInt32* eclass, 
+                    UInt32* bhtab,
+                    Int32   nblock,
+                    Int32   verb )
+{
+   Int32 ftab[257];
+   Int32 ftabCopy[256];
+   Int32 H, i, j, k, l, r, cc, cc1;
+   Int32 nNotDone;
+   Int32 nBhtab;
+   UChar* eclass8 = (UChar*)eclass;
+
+   /*--
+      Initial 1-char radix sort to generate
+      initial fmap and initial BH bits.
+   --*/
+   if (verb >= 4)
+      VPrintf0 ( "        bucket sorting ...\n" );
+   for (i = 0; i < 257;    i++) ftab[i] = 0;
+   for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
+   for (i = 0; i < 256;    i++) ftabCopy[i] = ftab[i];
+   for (i = 1; i < 257;    i++) ftab[i] += ftab[i-1];
+
+   for (i = 0; i < nblock; i++) {
+      j = eclass8[i];
+      k = ftab[j] - 1;
+      ftab[j] = k;
+      fmap[k] = i;
+   }
+
+   nBhtab = 2 + (nblock / 32);
+   for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
+   for (i = 0; i < 256; i++) SET_BH(ftab[i]);
+
+   /*--
+      Inductively refine the buckets.  Kind-of an
+      "exponential radix sort" (!), inspired by the
+      Manber-Myers suffix array construction algorithm.
+   --*/
+
+   /*-- set sentinel bits for block-end detection --*/
+   for (i = 0; i < 32; i++) { 
+      SET_BH(nblock + 2*i);
+      CLEAR_BH(nblock + 2*i + 1);
+   }
+
+   /*-- the log(N) loop --*/
+   H = 1;
+   while (1) {
+
+      if (verb >= 4) 
+         VPrintf1 ( "        depth %6d has ", H );
+
+      j = 0;
+      for (i = 0; i < nblock; i++) {
+         if (ISSET_BH(i)) j = i;
+         k = fmap[i] - H; if (k < 0) k += nblock;
+         eclass[k] = j;
+      }
+
+      nNotDone = 0;
+      r = -1;
+      while (1) {
+
+	 /*-- find the next non-singleton bucket --*/
+         k = r + 1;
+         while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
+         if (ISSET_BH(k)) {
+            while (WORD_BH(k) == 0xffffffff) k += 32;
+            while (ISSET_BH(k)) k++;
+         }
+         l = k - 1;
+         if (l >= nblock) break;
+         while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
+         if (!ISSET_BH(k)) {
+            while (WORD_BH(k) == 0x00000000) k += 32;
+            while (!ISSET_BH(k)) k++;
+         }
+         r = k - 1;
+         if (r >= nblock) break;
+
+         /*-- now [l, r] bracket current bucket --*/
+         if (r > l) {
+            nNotDone += (r - l + 1);
+            fallbackQSort3 ( fmap, eclass, l, r );
+
+            /*-- scan bucket and generate header bits-- */
+            cc = -1;
+            for (i = l; i <= r; i++) {
+               cc1 = eclass[fmap[i]];
+               if (cc != cc1) { SET_BH(i); cc = cc1; };
+            }
+         }
+      }
+
+      if (verb >= 4) 
+         VPrintf1 ( "%6d unresolved strings\n", nNotDone );
+
+      H *= 2;
+      if (H > nblock || nNotDone == 0) break;
+   }
+
+   /*-- 
+      Reconstruct the original block in
+      eclass8 [0 .. nblock-1], since the
+      previous phase destroyed it.
+   --*/
+   if (verb >= 4)
+      VPrintf0 ( "        reconstructing block ...\n" );
+   j = 0;
+   for (i = 0; i < nblock; i++) {
+      while (ftabCopy[j] == 0) j++;
+      ftabCopy[j]--;
+      eclass8[fmap[i]] = (UChar)j;
+   }
+   AssertH ( j < 256, 1005 );
+}
+
+#undef       SET_BH
+#undef     CLEAR_BH
+#undef     ISSET_BH
+#undef      WORD_BH
+#undef UNALIGNED_BH
+
+
+/*---------------------------------------------*/
+/*--- The main, O(N^2 log(N)) sorting       ---*/
+/*--- algorithm.  Faster for "normal"       ---*/
+/*--- non-repetitive blocks.                ---*/
+/*---------------------------------------------*/
+
+/*---------------------------------------------*/
+static
+__inline__
+Bool mainGtU ( UInt32  i1, 
+               UInt32  i2,
+               UChar*  block, 
+               UInt16* quadrant,
+               UInt32  nblock,
+               Int32*  budget )
+{
+   Int32  k;
+   UChar  c1, c2;
+   UInt16 s1, s2;
+
+   AssertD ( i1 != i2, "mainGtU" );
+   /* 1 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 2 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 3 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 4 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 5 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 6 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 7 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 8 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 9 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 10 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 11 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 12 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+
+   k = nblock + 8;
+
+   do {
+      /* 1 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 2 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 3 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 4 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 5 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 6 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 7 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 8 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+
+      if (i1 >= nblock) i1 -= nblock;
+      if (i2 >= nblock) i2 -= nblock;
+
+      k -= 8;
+      (*budget)--;
+   }
+      while (k >= 0);
+
+   return False;
+}
+
+
+/*---------------------------------------------*/
+/*--
+   Knuth's increments seem to work better
+   than Incerpi-Sedgewick here.  Possibly
+   because the number of elems to sort is
+   usually small, typically <= 20.
+--*/
+static
+Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
+                   9841, 29524, 88573, 265720,
+                   797161, 2391484 };
+
+static
+void mainSimpleSort ( UInt32* ptr,
+                      UChar*  block,
+                      UInt16* quadrant,
+                      Int32   nblock,
+                      Int32   lo, 
+                      Int32   hi, 
+                      Int32   d,
+                      Int32*  budget )
+{
+   Int32 i, j, h, bigN, hp;
+   UInt32 v;
+
+   bigN = hi - lo + 1;
+   if (bigN < 2) return;
+
+   hp = 0;
+   while (incs[hp] < bigN) hp++;
+   hp--;
+
+   for (; hp >= 0; hp--) {
+      h = incs[hp];
+
+      i = lo + h;
+      while (True) {
+
+         /*-- copy 1 --*/
+         if (i > hi) break;
+         v = ptr[i];
+         j = i;
+         while ( mainGtU ( 
+                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
+                 ) ) {
+            ptr[j] = ptr[j-h];
+            j = j - h;
+            if (j <= (lo + h - 1)) break;
+         }
+         ptr[j] = v;
+         i++;
+
+         /*-- copy 2 --*/
+         if (i > hi) break;
+         v = ptr[i];
+         j = i;
+         while ( mainGtU ( 
+                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
+                 ) ) {
+            ptr[j] = ptr[j-h];
+            j = j - h;
+            if (j <= (lo + h - 1)) break;
+         }
+         ptr[j] = v;
+         i++;
+
+         /*-- copy 3 --*/
+         if (i > hi) break;
+         v = ptr[i];
+         j = i;
+         while ( mainGtU ( 
+                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
+                 ) ) {
+            ptr[j] = ptr[j-h];
+            j = j - h;
+            if (j <= (lo + h - 1)) break;
+         }
+         ptr[j] = v;
+         i++;
+
+         if (*budget < 0) return;
+      }
+   }
+}
+
+
+/*---------------------------------------------*/
+/*--
+   The following is an implementation of
+   an elegant 3-way quicksort for strings,
+   described in a paper "Fast Algorithms for
+   Sorting and Searching Strings", by Robert
+   Sedgewick and Jon L. Bentley.
+--*/
+
+#define mswap(zz1, zz2) \
+   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
+
+#define mvswap(zzp1, zzp2, zzn)       \
+{                                     \
+   Int32 yyp1 = (zzp1);               \
+   Int32 yyp2 = (zzp2);               \
+   Int32 yyn  = (zzn);                \
+   while (yyn > 0) {                  \
+      mswap(ptr[yyp1], ptr[yyp2]);    \
+      yyp1++; yyp2++; yyn--;          \
+   }                                  \
+}
+
+static 
+__inline__
+UChar mmed3 ( UChar a, UChar b, UChar c )
+{
+   UChar t;
+   if (a > b) { t = a; a = b; b = t; };
+   if (b > c) { 
+      b = c;
+      if (a > b) b = a;
+   }
+   return b;
+}
+
+#define mmin(a,b) ((a) < (b)) ? (a) : (b)
+
+#define mpush(lz,hz,dz) { stackLo[sp] = lz; \
+                          stackHi[sp] = hz; \
+                          stackD [sp] = dz; \
+                          sp++; }
+
+#define mpop(lz,hz,dz) { sp--;             \
+                         lz = stackLo[sp]; \
+                         hz = stackHi[sp]; \
+                         dz = stackD [sp]; }
+
+
+#define mnextsize(az) (nextHi[az]-nextLo[az])
+
+#define mnextswap(az,bz)                                        \
+   { Int32 tz;                                                  \
+     tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
+     tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
+     tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
+
+
+#define MAIN_QSORT_SMALL_THRESH 20
+#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
+#define MAIN_QSORT_STACK_SIZE 100
+
+static
+void mainQSort3 ( UInt32* ptr,
+                  UChar*  block,
+                  UInt16* quadrant,
+                  Int32   nblock,
+                  Int32   loSt, 
+                  Int32   hiSt, 
+                  Int32   dSt,
+                  Int32*  budget )
+{
+   Int32 unLo, unHi, ltLo, gtHi, n, m, med;
+   Int32 sp, lo, hi, d;
+
+   Int32 stackLo[MAIN_QSORT_STACK_SIZE];
+   Int32 stackHi[MAIN_QSORT_STACK_SIZE];
+   Int32 stackD [MAIN_QSORT_STACK_SIZE];
+
+   Int32 nextLo[3];
+   Int32 nextHi[3];
+   Int32 nextD [3];
+
+   sp = 0;
+   mpush ( loSt, hiSt, dSt );
+
+   while (sp > 0) {
+
+      AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
+
+      mpop ( lo, hi, d );
+      if (hi - lo < MAIN_QSORT_SMALL_THRESH || 
+          d > MAIN_QSORT_DEPTH_THRESH) {
+         mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
+         if (*budget < 0) return;
+         continue;
+      }
+
+      med = (Int32) 
+            mmed3 ( block[ptr[ lo         ]+d],
+                    block[ptr[ hi         ]+d],
+                    block[ptr[ (lo+hi)>>1 ]+d] );
+
+      unLo = ltLo = lo;
+      unHi = gtHi = hi;
+
+      while (True) {
+         while (True) {
+            if (unLo > unHi) break;
+            n = ((Int32)block[ptr[unLo]+d]) - med;
+            if (n == 0) { 
+               mswap(ptr[unLo], ptr[ltLo]); 
+               ltLo++; unLo++; continue; 
+            };
+            if (n >  0) break;
+            unLo++;
+         }
+         while (True) {
+            if (unLo > unHi) break;
+            n = ((Int32)block[ptr[unHi]+d]) - med;
+            if (n == 0) { 
+               mswap(ptr[unHi], ptr[gtHi]); 
+               gtHi--; unHi--; continue; 
+            };
+            if (n <  0) break;
+            unHi--;
+         }
+         if (unLo > unHi) break;
+         mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
+      }
+
+      AssertD ( unHi == unLo-1, "mainQSort3(2)" );
+
+      if (gtHi < ltLo) {
+         mpush(lo, hi, d+1 );
+         continue;
+      }
+
+      n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
+      m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
+
+      n = lo + unLo - ltLo - 1;
+      m = hi - (gtHi - unHi) + 1;
+
+      nextLo[0] = lo;  nextHi[0] = n;   nextD[0] = d;
+      nextLo[1] = m;   nextHi[1] = hi;  nextD[1] = d;
+      nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
+
+      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
+      if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
+      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
+
+      AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
+      AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
+
+      mpush (nextLo[0], nextHi[0], nextD[0]);
+      mpush (nextLo[1], nextHi[1], nextD[1]);
+      mpush (nextLo[2], nextHi[2], nextD[2]);
+   }
+}
+
+#undef mswap
+#undef mvswap
+#undef mpush
+#undef mpop
+#undef mmin
+#undef mnextsize
+#undef mnextswap
+#undef MAIN_QSORT_SMALL_THRESH
+#undef MAIN_QSORT_DEPTH_THRESH
+#undef MAIN_QSORT_STACK_SIZE
+
+
+/*---------------------------------------------*/
+/* Pre:
+      nblock > N_OVERSHOOT
+      block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
+      ((UChar*)block32) [0 .. nblock-1] holds block
+      ptr exists for [0 .. nblock-1]
+
+   Post:
+      ((UChar*)block32) [0 .. nblock-1] holds block
+      All other areas of block32 destroyed
+      ftab [0 .. 65536 ] destroyed
+      ptr [0 .. nblock-1] holds sorted order
+      if (*budget < 0), sorting was abandoned
+*/
+
+#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
+#define SETMASK (1 << 21)
+#define CLEARMASK (~(SETMASK))
+
+static
+void mainSort ( UInt32* ptr, 
+                UChar*  block,
+                UInt16* quadrant, 
+                UInt32* ftab,
+                Int32   nblock,
+                Int32   verb,
+                Int32*  budget )
+{
+   Int32  i, j, k, ss, sb;
+   Int32  runningOrder[256];
+   Bool   bigDone[256];
+   Int32  copyStart[256];
+   Int32  copyEnd  [256];
+   UChar  c1;
+   Int32  numQSorted;
+   UInt16 s;
+   if (verb >= 4) VPrintf0 ( "        main sort initialise ...\n" );
+
+   /*-- set up the 2-byte frequency table --*/
+   for (i = 65536; i >= 0; i--) ftab[i] = 0;
+
+   j = block[0] << 8;
+   i = nblock-1;
+   for (; i >= 3; i -= 4) {
+      quadrant[i] = 0;
+      j = (j >> 8) | ( ((UInt16)block[i]) << 8);
+      ftab[j]++;
+      quadrant[i-1] = 0;
+      j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
+      ftab[j]++;
+      quadrant[i-2] = 0;
+      j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
+      ftab[j]++;
+      quadrant[i-3] = 0;
+      j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
+      ftab[j]++;
+   }
+   for (; i >= 0; i--) {
+      quadrant[i] = 0;
+      j = (j >> 8) | ( ((UInt16)block[i]) << 8);
+      ftab[j]++;
+   }
+
+   /*-- (emphasises close relationship of block & quadrant) --*/
+   for (i = 0; i < BZ_N_OVERSHOOT; i++) {
+      block   [nblock+i] = block[i];
+      quadrant[nblock+i] = 0;
+   }
+
+   if (verb >= 4) VPrintf0 ( "        bucket sorting ...\n" );
+
+   /*-- Complete the initial radix sort --*/
+   for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
+
+   s = block[0] << 8;
+   i = nblock-1;
+   for (; i >= 3; i -= 4) {
+      s = (s >> 8) | (block[i] << 8);
+      j = ftab[s] -1;
+      ftab[s] = j;
+      ptr[j] = i;
+      s = (s >> 8) | (block[i-1] << 8);
+      j = ftab[s] -1;
+      ftab[s] = j;
+      ptr[j] = i-1;
+      s = (s >> 8) | (block[i-2] << 8);
+      j = ftab[s] -1;
+      ftab[s] = j;
+      ptr[j] = i-2;
+      s = (s >> 8) | (block[i-3] << 8);
+      j = ftab[s] -1;
+      ftab[s] = j;
+      ptr[j] = i-3;
+   }
+   for (; i >= 0; i--) {
+      s = (s >> 8) | (block[i] << 8);
+      j = ftab[s] -1;
+      ftab[s] = j;
+      ptr[j] = i;
+   }
+
+   /*--
+      Now ftab contains the first loc of every small bucket.
+      Calculate the running order, from smallest to largest
+      big bucket.
+   --*/
+   for (i = 0; i <= 255; i++) {
+      bigDone     [i] = False;
+      runningOrder[i] = i;
+   }
+
+   {
+      Int32 vv;
+      Int32 h = 1;
+      do h = 3 * h + 1; while (h <= 256);
+      do {
+         h = h / 3;
+         for (i = h; i <= 255; i++) {
+            vv = runningOrder[i];
+            j = i;
+            while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
+               runningOrder[j] = runningOrder[j-h];
+               j = j - h;
+               if (j <= (h - 1)) goto zero;
+            }
+            zero:
+            runningOrder[j] = vv;
+         }
+      } while (h != 1);
+   }
+
+   /*--
+      The main sorting loop.
+   --*/
+
+   numQSorted = 0;
+
+   for (i = 0; i <= 255; i++) {
+
+      /*--
+         Process big buckets, starting with the least full.
+         Basically this is a 3-step process in which we call
+         mainQSort3 to sort the small buckets [ss, j], but
+         also make a big effort to avoid the calls if we can.
+      --*/
+      ss = runningOrder[i];
+
+      /*--
+         Step 1:
+         Complete the big bucket [ss] by quicksorting
+         any unsorted small buckets [ss, j], for j != ss.  
+         Hopefully previous pointer-scanning phases have already
+         completed many of the small buckets [ss, j], so
+         we don't have to sort them at all.
+      --*/
+      for (j = 0; j <= 255; j++) {
+         if (j != ss) {
+            sb = (ss << 8) + j;
+            if ( ! (ftab[sb] & SETMASK) ) {
+               Int32 lo = ftab[sb]   & CLEARMASK;
+               Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
+               if (hi > lo) {
+                  if (verb >= 4)
+                     VPrintf4 ( "        qsort [0x%x, 0x%x]   "
+                                "done %d   this %d\n",
+                                ss, j, numQSorted, hi - lo + 1 );
+                  mainQSort3 ( 
+                     ptr, block, quadrant, nblock, 
+                     lo, hi, BZ_N_RADIX, budget 
+                  );   
+                  numQSorted += (hi - lo + 1);
+                  if (*budget < 0) return;
+               }
+            }
+            ftab[sb] |= SETMASK;
+         }
+      }
+
+      AssertH ( !bigDone[ss], 1006 );
+
+      /*--
+         Step 2:
+         Now scan this big bucket [ss] so as to synthesise the
+         sorted order for small buckets [t, ss] for all t,
+         including, magically, the bucket [ss,ss] too.
+         This will avoid doing Real Work in subsequent Step 1's.
+      --*/
+      {
+         for (j = 0; j <= 255; j++) {
+            copyStart[j] =  ftab[(j << 8) + ss]     & CLEARMASK;
+            copyEnd  [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
+         }
+         for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
+            k = ptr[j]-1; if (k < 0) k += nblock;
+            c1 = block[k];
+            if (!bigDone[c1])
+               ptr[ copyStart[c1]++ ] = k;
+         }
+         for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
+            k = ptr[j]-1; if (k < 0) k += nblock;
+            c1 = block[k];
+            if (!bigDone[c1]) 
+               ptr[ copyEnd[c1]-- ] = k;
+         }
+      }
+
+      AssertH ( (copyStart[ss]-1 == copyEnd[ss])
+                || 
+                /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
+                   Necessity for this case is demonstrated by compressing 
+                   a sequence of approximately 48.5 million of character 
+                   251; 1.0.0/1.0.1 will then die here. */
+                (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
+                1007 )
+
+      for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
+
+      /*--
+         Step 3:
+         The [ss] big bucket is now done.  Record this fact,
+         and update the quadrant descriptors.  Remember to
+         update quadrants in the overshoot area too, if
+         necessary.  The "if (i < 255)" test merely skips
+         this updating for the last bucket processed, since
+         updating for the last bucket is pointless.
+
+         The quadrant array provides a way to incrementally
+         cache sort orderings, as they appear, so as to 
+         make subsequent comparisons in fullGtU() complete
+         faster.  For repetitive blocks this makes a big
+         difference (but not big enough to be able to avoid
+         the fallback sorting mechanism, exponential radix sort).
+
+         The precise meaning is: at all times:
+
+            for 0 <= i < nblock and 0 <= j <= nblock
+
+            if block[i] != block[j], 
+
+               then the relative values of quadrant[i] and 
+                    quadrant[j] are meaningless.
+
+               else {
+                  if quadrant[i] < quadrant[j]
+                     then the string starting at i lexicographically
+                     precedes the string starting at j
+
+                  else if quadrant[i] > quadrant[j]
+                     then the string starting at j lexicographically
+                     precedes the string starting at i
+
+                  else
+                     the relative ordering of the strings starting
+                     at i and j has not yet been determined.
+               }
+      --*/
+      bigDone[ss] = True;
+
+      if (i < 255) {
+         Int32 bbStart  = ftab[ss << 8] & CLEARMASK;
+         Int32 bbSize   = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
+         Int32 shifts   = 0;
+
+         while ((bbSize >> shifts) > 65534) shifts++;
+
+         for (j = bbSize-1; j >= 0; j--) {
+            Int32 a2update     = ptr[bbStart + j];
+            UInt16 qVal        = (UInt16)(j >> shifts);
+            quadrant[a2update] = qVal;
+            if (a2update < BZ_N_OVERSHOOT)
+               quadrant[a2update + nblock] = qVal;
+         }
+         AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
+      }
+
+   }
+
+   if (verb >= 4)
+      VPrintf3 ( "        %d pointers, %d sorted, %d scanned\n",
+                 nblock, numQSorted, nblock - numQSorted );
+}
+
+#undef BIGFREQ
+#undef SETMASK
+#undef CLEARMASK
+
+
+/*---------------------------------------------*/
+/* Pre:
+      nblock > 0
+      arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
+      ((UChar*)arr2)  [0 .. nblock-1] holds block
+      arr1 exists for [0 .. nblock-1]
+
+   Post:
+      ((UChar*)arr2) [0 .. nblock-1] holds block
+      All other areas of block destroyed
+      ftab [ 0 .. 65536 ] destroyed
+      arr1 [0 .. nblock-1] holds sorted order
+*/
+void BZ2_blockSort ( EState* s )
+{
+   UInt32* ptr    = s->ptr; 
+   UChar*  block  = s->block;
+   UInt32* ftab   = s->ftab;
+   Int32   nblock = s->nblock;
+   Int32   verb   = s->verbosity;
+   Int32   wfact  = s->workFactor;
+   UInt16* quadrant;
+   Int32   budget;
+   Int32   budgetInit;
+   Int32   i;
+
+   if (nblock < 10000) {
+      fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
+   } else {
+      /* Calculate the location for quadrant, remembering to get
+         the alignment right.  Assumes that &(block[0]) is at least
+         2-byte aligned -- this should be ok since block is really
+         the first section of arr2.
+      */
+      i = nblock+BZ_N_OVERSHOOT;
+      if (i & 1) i++;
+      quadrant = (UInt16*)(&(block[i]));
+
+      /* (wfact-1) / 3 puts the default-factor-30
+         transition point at very roughly the same place as 
+         with v0.1 and v0.9.0.  
+         Not that it particularly matters any more, since the
+         resulting compressed stream is now the same regardless
+         of whether or not we use the main sort or fallback sort.
+      */
+      if (wfact < 1  ) wfact = 1;
+      if (wfact > 100) wfact = 100;
+      budgetInit = nblock * ((wfact-1) / 3);
+      budget = budgetInit;
+
+      mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
+      if (verb >= 3) 
+         VPrintf3 ( "      %d work, %d block, ratio %5.2f\n",
+                    budgetInit - budget,
+                    nblock, 
+                    (float)(budgetInit - budget) /
+                    (float)(nblock==0 ? 1 : nblock) ); 
+      if (budget < 0) {
+         if (verb >= 2) 
+            VPrintf0 ( "    too repetitive; using fallback"
+                       " sorting algorithm\n" );
+         fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
+      }
+   }
+
+   s->origPtr = -1;
+   for (i = 0; i < s->nblock; i++)
+      if (ptr[i] == 0)
+         { s->origPtr = i; break; };
+
+   AssertH( s->origPtr != -1, 1003 );
+}
+
+
+/*-------------------------------------------------------------*/
+/*--- end                                       blocksort.c ---*/
+/*-------------------------------------------------------------*/
diff --git a/libraries/bzip2/bzlib.c b/libraries/bzip2/bzlib.c
new file mode 100644
index 000000000..21786551b
--- /dev/null
+++ b/libraries/bzip2/bzlib.c
@@ -0,0 +1,1572 @@
+
+/*-------------------------------------------------------------*/
+/*--- Library top-level functions.                          ---*/
+/*---                                               bzlib.c ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+   This file is part of bzip2/libbzip2, a program and library for
+   lossless, block-sorting data compression.
+
+   bzip2/libbzip2 version 1.0.8 of 13 July 2019
+   Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
+
+   Please read the WARNING, DISCLAIMER and PATENTS sections in the 
+   README file.
+
+   This program is released under the terms of the license contained
+   in the file LICENSE.
+   ------------------------------------------------------------------ */
+
+/* CHANGES
+   0.9.0    -- original version.
+   0.9.0a/b -- no changes in this file.
+   0.9.0c   -- made zero-length BZ_FLUSH work correctly in bzCompress().
+     fixed bzWrite/bzRead to ignore zero-length requests.
+     fixed bzread to correctly handle read requests after EOF.
+     wrong parameter order in call to bzDecompressInit in
+     bzBuffToBuffDecompress.  Fixed.
+*/
+
+#include "bzlib_private.h"
+
+
+/*---------------------------------------------------*/
+/*--- Compression stuff                           ---*/
+/*---------------------------------------------------*/
+
+
+/*---------------------------------------------------*/
+#ifndef BZ_NO_STDIO
+void BZ2_bz__AssertH__fail ( int errcode )
+{
+   fprintf(stderr, 
+      "\n\nbzip2/libbzip2: internal error number %d.\n"
+      "This is a bug in bzip2/libbzip2, %s.\n"
+      "Please report it to: bzip2-devel@sourceware.org.  If this happened\n"
+      "when you were using some program which uses libbzip2 as a\n"
+      "component, you should also report this bug to the author(s)\n"
+      "of that program.  Please make an effort to report this bug;\n"
+      "timely and accurate bug reports eventually lead to higher\n"
+      "quality software.  Thanks.\n\n",
+      errcode,
+      BZ2_bzlibVersion()
+   );
+
+   if (errcode == 1007) {
+   fprintf(stderr,
+      "\n*** A special note about internal error number 1007 ***\n"
+      "\n"
+      "Experience suggests that a common cause of i.e. 1007\n"
+      "is unreliable memory or other hardware.  The 1007 assertion\n"
+      "just happens to cross-check the results of huge numbers of\n"
+      "memory reads/writes, and so acts (unintendedly) as a stress\n"
+      "test of your memory system.\n"
+      "\n"
+      "I suggest the following: try compressing the file again,\n"
+      "possibly monitoring progress in detail with the -vv flag.\n"
+      "\n"
+      "* If the error cannot be reproduced, and/or happens at different\n"
+      "  points in compression, you may have a flaky memory system.\n"
+      "  Try a memory-test program.  I have used Memtest86\n"
+      "  (www.memtest86.com).  At the time of writing it is free (GPLd).\n"
+      "  Memtest86 tests memory much more thorougly than your BIOSs\n"
+      "  power-on test, and may find failures that the BIOS doesn't.\n"
+      "\n"
+      "* If the error can be repeatably reproduced, this is a bug in\n"
+      "  bzip2, and I would very much like to hear about it.  Please\n"
+      "  let me know, and, ideally, save a copy of the file causing the\n"
+      "  problem -- without which I will be unable to investigate it.\n"
+      "\n"
+   );
+   }
+
+   exit(3);
+}
+#endif
+
+
+/*---------------------------------------------------*/
+static
+int bz_config_ok ( void )
+{
+   if (sizeof(int)   != 4) return 0;
+   if (sizeof(short) != 2) return 0;
+   if (sizeof(char)  != 1) return 0;
+   return 1;
+}
+
+
+/*---------------------------------------------------*/
+static
+void* default_bzalloc ( void* opaque, Int32 items, Int32 size )
+{
+   void* v = malloc ( items * size );
+   return v;
+}
+
+static
+void default_bzfree ( void* opaque, void* addr )
+{
+   if (addr != NULL) free ( addr );
+}
+
+
+/*---------------------------------------------------*/
+static
+void prepare_new_block ( EState* s )
+{
+   Int32 i;
+   s->nblock = 0;
+   s->numZ = 0;
+   s->state_out_pos = 0;
+   BZ_INITIALISE_CRC ( s->blockCRC );
+   for (i = 0; i < 256; i++) s->inUse[i] = False;
+   s->blockNo++;
+}
+
+
+/*---------------------------------------------------*/
+static
+void init_RL ( EState* s )
+{
+   s->state_in_ch  = 256;
+   s->state_in_len = 0;
+}
+
+
+static
+Bool isempty_RL ( EState* s )
+{
+   if (s->state_in_ch < 256 && s->state_in_len > 0)
+      return False; else
+      return True;
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzCompressInit) 
+                    ( bz_stream* strm, 
+                     int        blockSize100k,
+                     int        verbosity,
+                     int        workFactor )
+{
+   Int32   n;
+   EState* s;
+
+   if (!bz_config_ok()) return BZ_CONFIG_ERROR;
+
+   if (strm == NULL || 
+       blockSize100k < 1 || blockSize100k > 9 ||
+       workFactor < 0 || workFactor > 250)
+     return BZ_PARAM_ERROR;
+
+   if (workFactor == 0) workFactor = 30;
+   if (strm->bzalloc == NULL) strm->bzalloc = default_bzalloc;
+   if (strm->bzfree == NULL) strm->bzfree = default_bzfree;
+
+   s = BZALLOC( sizeof(EState) );
+   if (s == NULL) return BZ_MEM_ERROR;
+   s->strm = strm;
+
+   s->arr1 = NULL;
+   s->arr2 = NULL;
+   s->ftab = NULL;
+
+   n       = 100000 * blockSize100k;
+   s->arr1 = BZALLOC( n                  * sizeof(UInt32) );
+   s->arr2 = BZALLOC( (n+BZ_N_OVERSHOOT) * sizeof(UInt32) );
+   s->ftab = BZALLOC( 65537              * sizeof(UInt32) );
+
+   if (s->arr1 == NULL || s->arr2 == NULL || s->ftab == NULL) {
+      if (s->arr1 != NULL) BZFREE(s->arr1);
+      if (s->arr2 != NULL) BZFREE(s->arr2);
+      if (s->ftab != NULL) BZFREE(s->ftab);
+      if (s       != NULL) BZFREE(s);
+      return BZ_MEM_ERROR;
+   }
+
+   s->blockNo           = 0;
+   s->state             = BZ_S_INPUT;
+   s->mode              = BZ_M_RUNNING;
+   s->combinedCRC       = 0;
+   s->blockSize100k     = blockSize100k;
+   s->nblockMAX         = 100000 * blockSize100k - 19;
+   s->verbosity         = verbosity;
+   s->workFactor        = workFactor;
+
+   s->block             = (UChar*)s->arr2;
+   s->mtfv              = (UInt16*)s->arr1;
+   s->zbits             = NULL;
+   s->ptr               = (UInt32*)s->arr1;
+
+   strm->state          = s;
+   strm->total_in_lo32  = 0;
+   strm->total_in_hi32  = 0;
+   strm->total_out_lo32 = 0;
+   strm->total_out_hi32 = 0;
+   init_RL ( s );
+   prepare_new_block ( s );
+   return BZ_OK;
+}
+
+
+/*---------------------------------------------------*/
+static
+void add_pair_to_block ( EState* s )
+{
+   Int32 i;
+   UChar ch = (UChar)(s->state_in_ch);
+   for (i = 0; i < s->state_in_len; i++) {
+      BZ_UPDATE_CRC( s->blockCRC, ch );
+   }
+   s->inUse[s->state_in_ch] = True;
+   switch (s->state_in_len) {
+      case 1:
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         break;
+      case 2:
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         break;
+      case 3:
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         break;
+      default:
+         s->inUse[s->state_in_len-4] = True;
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         s->block[s->nblock] = ((UChar)(s->state_in_len-4));
+         s->nblock++;
+         break;
+   }
+}
+
+
+/*---------------------------------------------------*/
+static
+void flush_RL ( EState* s )
+{
+   if (s->state_in_ch < 256) add_pair_to_block ( s );
+   init_RL ( s );
+}
+
+
+/*---------------------------------------------------*/
+#define ADD_CHAR_TO_BLOCK(zs,zchh0)               \
+{                                                 \
+   UInt32 zchh = (UInt32)(zchh0);                 \
+   /*-- fast track the common case --*/           \
+   if (zchh != zs->state_in_ch &&                 \
+       zs->state_in_len == 1) {                   \
+      UChar ch = (UChar)(zs->state_in_ch);        \
+      BZ_UPDATE_CRC( zs->blockCRC, ch );          \
+      zs->inUse[zs->state_in_ch] = True;          \
+      zs->block[zs->nblock] = (UChar)ch;          \
+      zs->nblock++;                               \
+      zs->state_in_ch = zchh;                     \
+   }                                              \
+   else                                           \
+   /*-- general, uncommon cases --*/              \
+   if (zchh != zs->state_in_ch ||                 \
+      zs->state_in_len == 255) {                  \
+      if (zs->state_in_ch < 256)                  \
+         add_pair_to_block ( zs );                \
+      zs->state_in_ch = zchh;                     \
+      zs->state_in_len = 1;                       \
+   } else {                                       \
+      zs->state_in_len++;                         \
+   }                                              \
+}
+
+
+/*---------------------------------------------------*/
+static
+Bool copy_input_until_stop ( EState* s )
+{
+   Bool progress_in = False;
+
+   if (s->mode == BZ_M_RUNNING) {
+
+      /*-- fast track the common case --*/
+      while (True) {
+         /*-- block full? --*/
+         if (s->nblock >= s->nblockMAX) break;
+         /*-- no input? --*/
+         if (s->strm->avail_in == 0) break;
+         progress_in = True;
+         ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) ); 
+         s->strm->next_in++;
+         s->strm->avail_in--;
+         s->strm->total_in_lo32++;
+         if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++;
+      }
+
+   } else {
+
+      /*-- general, uncommon case --*/
+      while (True) {
+         /*-- block full? --*/
+         if (s->nblock >= s->nblockMAX) break;
+         /*-- no input? --*/
+         if (s->strm->avail_in == 0) break;
+         /*-- flush/finish end? --*/
+         if (s->avail_in_expect == 0) break;
+         progress_in = True;
+         ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) ); 
+         s->strm->next_in++;
+         s->strm->avail_in--;
+         s->strm->total_in_lo32++;
+         if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++;
+         s->avail_in_expect--;
+      }
+   }
+   return progress_in;
+}
+
+
+/*---------------------------------------------------*/
+static
+Bool copy_output_until_stop ( EState* s )
+{
+   Bool progress_out = False;
+
+   while (True) {
+
+      /*-- no output space? --*/
+      if (s->strm->avail_out == 0) break;
+
+      /*-- block done? --*/
+      if (s->state_out_pos >= s->numZ) break;
+
+      progress_out = True;
+      *(s->strm->next_out) = s->zbits[s->state_out_pos];
+      s->state_out_pos++;
+      s->strm->avail_out--;
+      s->strm->next_out++;
+      s->strm->total_out_lo32++;
+      if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
+   }
+
+   return progress_out;
+}
+
+
+/*---------------------------------------------------*/
+static
+Bool handle_compress ( bz_stream* strm )
+{
+   Bool progress_in  = False;
+   Bool progress_out = False;
+   EState* s = strm->state;
+   
+   while (True) {
+
+      if (s->state == BZ_S_OUTPUT) {
+         progress_out |= copy_output_until_stop ( s );
+         if (s->state_out_pos < s->numZ) break;
+         if (s->mode == BZ_M_FINISHING && 
+             s->avail_in_expect == 0 &&
+             isempty_RL(s)) break;
+         prepare_new_block ( s );
+         s->state = BZ_S_INPUT;
+         if (s->mode == BZ_M_FLUSHING && 
+             s->avail_in_expect == 0 &&
+             isempty_RL(s)) break;
+      }
+
+      if (s->state == BZ_S_INPUT) {
+         progress_in |= copy_input_until_stop ( s );
+         if (s->mode != BZ_M_RUNNING && s->avail_in_expect == 0) {
+            flush_RL ( s );
+            BZ2_compressBlock ( s, (Bool)(s->mode == BZ_M_FINISHING) );
+            s->state = BZ_S_OUTPUT;
+         }
+         else
+         if (s->nblock >= s->nblockMAX) {
+            BZ2_compressBlock ( s, False );
+            s->state = BZ_S_OUTPUT;
+         }
+         else
+         if (s->strm->avail_in == 0) {
+            break;
+         }
+      }
+
+   }
+
+   return progress_in || progress_out;
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzCompress) ( bz_stream *strm, int action )
+{
+   Bool progress;
+   EState* s;
+   if (strm == NULL) return BZ_PARAM_ERROR;
+   s = strm->state;
+   if (s == NULL) return BZ_PARAM_ERROR;
+   if (s->strm != strm) return BZ_PARAM_ERROR;
+
+   preswitch:
+   switch (s->mode) {
+
+      case BZ_M_IDLE:
+         return BZ_SEQUENCE_ERROR;
+
+      case BZ_M_RUNNING:
+         if (action == BZ_RUN) {
+            progress = handle_compress ( strm );
+            return progress ? BZ_RUN_OK : BZ_PARAM_ERROR;
+         } 
+         else
+	 if (action == BZ_FLUSH) {
+            s->avail_in_expect = strm->avail_in;
+            s->mode = BZ_M_FLUSHING;
+            goto preswitch;
+         }
+         else
+         if (action == BZ_FINISH) {
+            s->avail_in_expect = strm->avail_in;
+            s->mode = BZ_M_FINISHING;
+            goto preswitch;
+         }
+         else 
+            return BZ_PARAM_ERROR;
+
+      case BZ_M_FLUSHING:
+         if (action != BZ_FLUSH) return BZ_SEQUENCE_ERROR;
+         if (s->avail_in_expect != s->strm->avail_in) 
+            return BZ_SEQUENCE_ERROR;
+         progress = handle_compress ( strm );
+         if (s->avail_in_expect > 0 || !isempty_RL(s) ||
+             s->state_out_pos < s->numZ) return BZ_FLUSH_OK;
+         s->mode = BZ_M_RUNNING;
+         return BZ_RUN_OK;
+
+      case BZ_M_FINISHING:
+         if (action != BZ_FINISH) return BZ_SEQUENCE_ERROR;
+         if (s->avail_in_expect != s->strm->avail_in) 
+            return BZ_SEQUENCE_ERROR;
+         progress = handle_compress ( strm );
+         if (!progress) return BZ_SEQUENCE_ERROR;
+         if (s->avail_in_expect > 0 || !isempty_RL(s) ||
+             s->state_out_pos < s->numZ) return BZ_FINISH_OK;
+         s->mode = BZ_M_IDLE;
+         return BZ_STREAM_END;
+   }
+   return BZ_OK; /*--not reached--*/
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzCompressEnd)  ( bz_stream *strm )
+{
+   EState* s;
+   if (strm == NULL) return BZ_PARAM_ERROR;
+   s = strm->state;
+   if (s == NULL) return BZ_PARAM_ERROR;
+   if (s->strm != strm) return BZ_PARAM_ERROR;
+
+   if (s->arr1 != NULL) BZFREE(s->arr1);
+   if (s->arr2 != NULL) BZFREE(s->arr2);
+   if (s->ftab != NULL) BZFREE(s->ftab);
+   BZFREE(strm->state);
+
+   strm->state = NULL;   
+
+   return BZ_OK;
+}
+
+
+/*---------------------------------------------------*/
+/*--- Decompression stuff                         ---*/
+/*---------------------------------------------------*/
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzDecompressInit) 
+                     ( bz_stream* strm, 
+                       int        verbosity,
+                       int        small )
+{
+   DState* s;
+
+   if (!bz_config_ok()) return BZ_CONFIG_ERROR;
+
+   if (strm == NULL) return BZ_PARAM_ERROR;
+   if (small != 0 && small != 1) return BZ_PARAM_ERROR;
+   if (verbosity < 0 || verbosity > 4) return BZ_PARAM_ERROR;
+
+   if (strm->bzalloc == NULL) strm->bzalloc = default_bzalloc;
+   if (strm->bzfree == NULL) strm->bzfree = default_bzfree;
+
+   s = BZALLOC( sizeof(DState) );
+   if (s == NULL) return BZ_MEM_ERROR;
+   s->strm                  = strm;
+   strm->state              = s;
+   s->state                 = BZ_X_MAGIC_1;
+   s->bsLive                = 0;
+   s->bsBuff                = 0;
+   s->calculatedCombinedCRC = 0;
+   strm->total_in_lo32      = 0;
+   strm->total_in_hi32      = 0;
+   strm->total_out_lo32     = 0;
+   strm->total_out_hi32     = 0;
+   s->smallDecompress       = (Bool)small;
+   s->ll4                   = NULL;
+   s->ll16                  = NULL;
+   s->tt                    = NULL;
+   s->currBlockNo           = 0;
+   s->verbosity             = verbosity;
+
+   return BZ_OK;
+}
+
+
+/*---------------------------------------------------*/
+/* Return  True iff data corruption is discovered.
+   Returns False if there is no problem.
+*/
+static
+Bool unRLE_obuf_to_output_FAST ( DState* s )
+{
+   UChar k1;
+
+   if (s->blockRandomised) {
+
+      while (True) {
+         /* try to finish existing run */
+         while (True) {
+            if (s->strm->avail_out == 0) return False;
+            if (s->state_out_len == 0) break;
+            *( (UChar*)(s->strm->next_out) ) = s->state_out_ch;
+            BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch );
+            s->state_out_len--;
+            s->strm->next_out++;
+            s->strm->avail_out--;
+            s->strm->total_out_lo32++;
+            if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
+         }
+
+         /* can a new run be started? */
+         if (s->nblock_used == s->save_nblock+1) return False;
+               
+         /* Only caused by corrupt data stream? */
+         if (s->nblock_used > s->save_nblock+1)
+            return True;
+   
+         s->state_out_len = 1;
+         s->state_out_ch = s->k0;
+         BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; 
+         k1 ^= BZ_RAND_MASK; s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         s->state_out_len = 2;
+         BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; 
+         k1 ^= BZ_RAND_MASK; s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         s->state_out_len = 3;
+         BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; 
+         k1 ^= BZ_RAND_MASK; s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; 
+         k1 ^= BZ_RAND_MASK; s->nblock_used++;
+         s->state_out_len = ((Int32)k1) + 4;
+         BZ_GET_FAST(s->k0); BZ_RAND_UPD_MASK; 
+         s->k0 ^= BZ_RAND_MASK; s->nblock_used++;
+      }
+
+   } else {
+
+      /* restore */
+      UInt32        c_calculatedBlockCRC = s->calculatedBlockCRC;
+      UChar         c_state_out_ch       = s->state_out_ch;
+      Int32         c_state_out_len      = s->state_out_len;
+      Int32         c_nblock_used        = s->nblock_used;
+      Int32         c_k0                 = s->k0;
+      UInt32*       c_tt                 = s->tt;
+      UInt32        c_tPos               = s->tPos;
+      char*         cs_next_out          = s->strm->next_out;
+      unsigned int  cs_avail_out         = s->strm->avail_out;
+      Int32         ro_blockSize100k     = s->blockSize100k;
+      /* end restore */
+
+      UInt32       avail_out_INIT = cs_avail_out;
+      Int32        s_save_nblockPP = s->save_nblock+1;
+      unsigned int total_out_lo32_old;
+
+      while (True) {
+
+         /* try to finish existing run */
+         if (c_state_out_len > 0) {
+            while (True) {
+               if (cs_avail_out == 0) goto return_notr;
+               if (c_state_out_len == 1) break;
+               *( (UChar*)(cs_next_out) ) = c_state_out_ch;
+               BZ_UPDATE_CRC ( c_calculatedBlockCRC, c_state_out_ch );
+               c_state_out_len--;
+               cs_next_out++;
+               cs_avail_out--;
+            }
+            s_state_out_len_eq_one:
+            {
+               if (cs_avail_out == 0) { 
+                  c_state_out_len = 1; goto return_notr;
+               };
+               *( (UChar*)(cs_next_out) ) = c_state_out_ch;
+               BZ_UPDATE_CRC ( c_calculatedBlockCRC, c_state_out_ch );
+               cs_next_out++;
+               cs_avail_out--;
+            }
+         }   
+         /* Only caused by corrupt data stream? */
+         if (c_nblock_used > s_save_nblockPP)
+            return True;
+
+         /* can a new run be started? */
+         if (c_nblock_used == s_save_nblockPP) {
+            c_state_out_len = 0; goto return_notr;
+         };   
+         c_state_out_ch = c_k0;
+         BZ_GET_FAST_C(k1); c_nblock_used++;
+         if (k1 != c_k0) { 
+            c_k0 = k1; goto s_state_out_len_eq_one; 
+         };
+         if (c_nblock_used == s_save_nblockPP) 
+            goto s_state_out_len_eq_one;
+   
+         c_state_out_len = 2;
+         BZ_GET_FAST_C(k1); c_nblock_used++;
+         if (c_nblock_used == s_save_nblockPP) continue;
+         if (k1 != c_k0) { c_k0 = k1; continue; };
+   
+         c_state_out_len = 3;
+         BZ_GET_FAST_C(k1); c_nblock_used++;
+         if (c_nblock_used == s_save_nblockPP) continue;
+         if (k1 != c_k0) { c_k0 = k1; continue; };
+   
+         BZ_GET_FAST_C(k1); c_nblock_used++;
+         c_state_out_len = ((Int32)k1) + 4;
+         BZ_GET_FAST_C(c_k0); c_nblock_used++;
+      }
+
+      return_notr:
+      total_out_lo32_old = s->strm->total_out_lo32;
+      s->strm->total_out_lo32 += (avail_out_INIT - cs_avail_out);
+      if (s->strm->total_out_lo32 < total_out_lo32_old)
+         s->strm->total_out_hi32++;
+
+      /* save */
+      s->calculatedBlockCRC = c_calculatedBlockCRC;
+      s->state_out_ch       = c_state_out_ch;
+      s->state_out_len      = c_state_out_len;
+      s->nblock_used        = c_nblock_used;
+      s->k0                 = c_k0;
+      s->tt                 = c_tt;
+      s->tPos               = c_tPos;
+      s->strm->next_out     = cs_next_out;
+      s->strm->avail_out    = cs_avail_out;
+      /* end save */
+   }
+   return False;
+}
+
+
+
+/*---------------------------------------------------*/
+__inline__ Int32 BZ2_indexIntoF ( Int32 indx, Int32 *cftab )
+{
+   Int32 nb, na, mid;
+   nb = 0;
+   na = 256;
+   do {
+      mid = (nb + na) >> 1;
+      if (indx >= cftab[mid]) nb = mid; else na = mid;
+   }
+   while (na - nb != 1);
+   return nb;
+}
+
+
+/*---------------------------------------------------*/
+/* Return  True iff data corruption is discovered.
+   Returns False if there is no problem.
+*/
+static
+Bool unRLE_obuf_to_output_SMALL ( DState* s )
+{
+   UChar k1;
+
+   if (s->blockRandomised) {
+
+      while (True) {
+         /* try to finish existing run */
+         while (True) {
+            if (s->strm->avail_out == 0) return False;
+            if (s->state_out_len == 0) break;
+            *( (UChar*)(s->strm->next_out) ) = s->state_out_ch;
+            BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch );
+            s->state_out_len--;
+            s->strm->next_out++;
+            s->strm->avail_out--;
+            s->strm->total_out_lo32++;
+            if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
+         }
+   
+         /* can a new run be started? */
+         if (s->nblock_used == s->save_nblock+1) return False;
+
+         /* Only caused by corrupt data stream? */
+         if (s->nblock_used > s->save_nblock+1)
+            return True;
+   
+         s->state_out_len = 1;
+         s->state_out_ch = s->k0;
+         BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; 
+         k1 ^= BZ_RAND_MASK; s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         s->state_out_len = 2;
+         BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; 
+         k1 ^= BZ_RAND_MASK; s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         s->state_out_len = 3;
+         BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; 
+         k1 ^= BZ_RAND_MASK; s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; 
+         k1 ^= BZ_RAND_MASK; s->nblock_used++;
+         s->state_out_len = ((Int32)k1) + 4;
+         BZ_GET_SMALL(s->k0); BZ_RAND_UPD_MASK; 
+         s->k0 ^= BZ_RAND_MASK; s->nblock_used++;
+      }
+
+   } else {
+
+      while (True) {
+         /* try to finish existing run */
+         while (True) {
+            if (s->strm->avail_out == 0) return False;
+            if (s->state_out_len == 0) break;
+            *( (UChar*)(s->strm->next_out) ) = s->state_out_ch;
+            BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch );
+            s->state_out_len--;
+            s->strm->next_out++;
+            s->strm->avail_out--;
+            s->strm->total_out_lo32++;
+            if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
+         }
+   
+         /* can a new run be started? */
+         if (s->nblock_used == s->save_nblock+1) return False;
+
+         /* Only caused by corrupt data stream? */
+         if (s->nblock_used > s->save_nblock+1)
+            return True;
+   
+         s->state_out_len = 1;
+         s->state_out_ch = s->k0;
+         BZ_GET_SMALL(k1); s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         s->state_out_len = 2;
+         BZ_GET_SMALL(k1); s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         s->state_out_len = 3;
+         BZ_GET_SMALL(k1); s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         BZ_GET_SMALL(k1); s->nblock_used++;
+         s->state_out_len = ((Int32)k1) + 4;
+         BZ_GET_SMALL(s->k0); s->nblock_used++;
+      }
+
+   }
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzDecompress) ( bz_stream *strm )
+{
+   Bool    corrupt;
+   DState* s;
+   if (strm == NULL) return BZ_PARAM_ERROR;
+   s = strm->state;
+   if (s == NULL) return BZ_PARAM_ERROR;
+   if (s->strm != strm) return BZ_PARAM_ERROR;
+
+   while (True) {
+      if (s->state == BZ_X_IDLE) return BZ_SEQUENCE_ERROR;
+      if (s->state == BZ_X_OUTPUT) {
+         if (s->smallDecompress)
+            corrupt = unRLE_obuf_to_output_SMALL ( s ); else
+            corrupt = unRLE_obuf_to_output_FAST  ( s );
+         if (corrupt) return BZ_DATA_ERROR;
+         if (s->nblock_used == s->save_nblock+1 && s->state_out_len == 0) {
+            BZ_FINALISE_CRC ( s->calculatedBlockCRC );
+            if (s->verbosity >= 3) 
+               VPrintf2 ( " {0x%08x, 0x%08x}", s->storedBlockCRC, 
+                          s->calculatedBlockCRC );
+            if (s->verbosity >= 2) VPrintf0 ( "]" );
+            if (s->calculatedBlockCRC != s->storedBlockCRC)
+               return BZ_DATA_ERROR;
+            s->calculatedCombinedCRC 
+               = (s->calculatedCombinedCRC << 1) | 
+                    (s->calculatedCombinedCRC >> 31);
+            s->calculatedCombinedCRC ^= s->calculatedBlockCRC;
+            s->state = BZ_X_BLKHDR_1;
+         } else {
+            return BZ_OK;
+         }
+      }
+      if (s->state >= BZ_X_MAGIC_1) {
+         Int32 r = BZ2_decompress ( s );
+         if (r == BZ_STREAM_END) {
+            if (s->verbosity >= 3)
+               VPrintf2 ( "\n    combined CRCs: stored = 0x%08x, computed = 0x%08x", 
+                          s->storedCombinedCRC, s->calculatedCombinedCRC );
+            if (s->calculatedCombinedCRC != s->storedCombinedCRC)
+               return BZ_DATA_ERROR;
+            return r;
+         }
+         if (s->state != BZ_X_OUTPUT) return r;
+      }
+   }
+
+   AssertH ( 0, 6001 );
+
+   return 0;  /*NOTREACHED*/
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzDecompressEnd)  ( bz_stream *strm )
+{
+   DState* s;
+   if (strm == NULL) return BZ_PARAM_ERROR;
+   s = strm->state;
+   if (s == NULL) return BZ_PARAM_ERROR;
+   if (s->strm != strm) return BZ_PARAM_ERROR;
+
+   if (s->tt   != NULL) BZFREE(s->tt);
+   if (s->ll16 != NULL) BZFREE(s->ll16);
+   if (s->ll4  != NULL) BZFREE(s->ll4);
+
+   BZFREE(strm->state);
+   strm->state = NULL;
+
+   return BZ_OK;
+}
+
+
+#ifndef BZ_NO_STDIO
+/*---------------------------------------------------*/
+/*--- File I/O stuff                              ---*/
+/*---------------------------------------------------*/
+
+#define BZ_SETERR(eee)                    \
+{                                         \
+   if (bzerror != NULL) *bzerror = eee;   \
+   if (bzf != NULL) bzf->lastErr = eee;   \
+}
+
+typedef 
+   struct {
+      FILE*     handle;
+      Char      buf[BZ_MAX_UNUSED];
+      Int32     bufN;
+      Bool      writing;
+      bz_stream strm;
+      Int32     lastErr;
+      Bool      initialisedOk;
+   }
+   bzFile;
+
+
+/*---------------------------------------------*/
+static Bool myfeof ( FILE* f )
+{
+   Int32 c = fgetc ( f );
+   if (c == EOF) return True;
+   ungetc ( c, f );
+   return False;
+}
+
+
+/*---------------------------------------------------*/
+BZFILE* BZ_API(BZ2_bzWriteOpen) 
+                    ( int*  bzerror,      
+                      FILE* f, 
+                      int   blockSize100k, 
+                      int   verbosity,
+                      int   workFactor )
+{
+   Int32   ret;
+   bzFile* bzf = NULL;
+
+   BZ_SETERR(BZ_OK);
+
+   if (f == NULL ||
+       (blockSize100k < 1 || blockSize100k > 9) ||
+       (workFactor < 0 || workFactor > 250) ||
+       (verbosity < 0 || verbosity > 4))
+      { BZ_SETERR(BZ_PARAM_ERROR); return NULL; };
+
+   if (ferror(f))
+      { BZ_SETERR(BZ_IO_ERROR); return NULL; };
+
+   bzf = malloc ( sizeof(bzFile) );
+   if (bzf == NULL)
+      { BZ_SETERR(BZ_MEM_ERROR); return NULL; };
+
+   BZ_SETERR(BZ_OK);
+   bzf->initialisedOk = False;
+   bzf->bufN          = 0;
+   bzf->handle        = f;
+   bzf->writing       = True;
+   bzf->strm.bzalloc  = NULL;
+   bzf->strm.bzfree   = NULL;
+   bzf->strm.opaque   = NULL;
+
+   if (workFactor == 0) workFactor = 30;
+   ret = BZ2_bzCompressInit ( &(bzf->strm), blockSize100k, 
+                              verbosity, workFactor );
+   if (ret != BZ_OK)
+      { BZ_SETERR(ret); free(bzf); return NULL; };
+
+   bzf->strm.avail_in = 0;
+   bzf->initialisedOk = True;
+   return bzf;   
+}
+
+
+
+/*---------------------------------------------------*/
+void BZ_API(BZ2_bzWrite)
+             ( int*    bzerror, 
+               BZFILE* b, 
+               void*   buf, 
+               int     len )
+{
+   Int32 n, n2, ret;
+   bzFile* bzf = (bzFile*)b;
+
+   BZ_SETERR(BZ_OK);
+   if (bzf == NULL || buf == NULL || len < 0)
+      { BZ_SETERR(BZ_PARAM_ERROR); return; };
+   if (!(bzf->writing))
+      { BZ_SETERR(BZ_SEQUENCE_ERROR); return; };
+   if (ferror(bzf->handle))
+      { BZ_SETERR(BZ_IO_ERROR); return; };
+
+   if (len == 0)
+      { BZ_SETERR(BZ_OK); return; };
+
+   bzf->strm.avail_in = len;
+   bzf->strm.next_in  = buf;
+
+   while (True) {
+      bzf->strm.avail_out = BZ_MAX_UNUSED;
+      bzf->strm.next_out = bzf->buf;
+      ret = BZ2_bzCompress ( &(bzf->strm), BZ_RUN );
+      if (ret != BZ_RUN_OK)
+         { BZ_SETERR(ret); return; };
+
+      if (bzf->strm.avail_out < BZ_MAX_UNUSED) {
+         n = BZ_MAX_UNUSED - bzf->strm.avail_out;
+         n2 = fwrite ( (void*)(bzf->buf), sizeof(UChar), 
+                       n, bzf->handle );
+         if (n != n2 || ferror(bzf->handle))
+            { BZ_SETERR(BZ_IO_ERROR); return; };
+      }
+
+      if (bzf->strm.avail_in == 0)
+         { BZ_SETERR(BZ_OK); return; };
+   }
+}
+
+
+/*---------------------------------------------------*/
+void BZ_API(BZ2_bzWriteClose)
+                  ( int*          bzerror, 
+                    BZFILE*       b, 
+                    int           abandon,
+                    unsigned int* nbytes_in,
+                    unsigned int* nbytes_out )
+{
+   BZ2_bzWriteClose64 ( bzerror, b, abandon, 
+                        nbytes_in, NULL, nbytes_out, NULL );
+}
+
+
+void BZ_API(BZ2_bzWriteClose64)
+                  ( int*          bzerror, 
+                    BZFILE*       b, 
+                    int           abandon,
+                    unsigned int* nbytes_in_lo32,
+                    unsigned int* nbytes_in_hi32,
+                    unsigned int* nbytes_out_lo32,
+                    unsigned int* nbytes_out_hi32 )
+{
+   Int32   n, n2, ret;
+   bzFile* bzf = (bzFile*)b;
+
+   if (bzf == NULL)
+      { BZ_SETERR(BZ_OK); return; };
+   if (!(bzf->writing))
+      { BZ_SETERR(BZ_SEQUENCE_ERROR); return; };
+   if (ferror(bzf->handle))
+      { BZ_SETERR(BZ_IO_ERROR); return; };
+
+   if (nbytes_in_lo32 != NULL) *nbytes_in_lo32 = 0;
+   if (nbytes_in_hi32 != NULL) *nbytes_in_hi32 = 0;
+   if (nbytes_out_lo32 != NULL) *nbytes_out_lo32 = 0;
+   if (nbytes_out_hi32 != NULL) *nbytes_out_hi32 = 0;
+
+   if ((!abandon) && bzf->lastErr == BZ_OK) {
+      while (True) {
+         bzf->strm.avail_out = BZ_MAX_UNUSED;
+         bzf->strm.next_out = bzf->buf;
+         ret = BZ2_bzCompress ( &(bzf->strm), BZ_FINISH );
+         if (ret != BZ_FINISH_OK && ret != BZ_STREAM_END)
+            { BZ_SETERR(ret); return; };
+
+         if (bzf->strm.avail_out < BZ_MAX_UNUSED) {
+            n = BZ_MAX_UNUSED - bzf->strm.avail_out;
+            n2 = fwrite ( (void*)(bzf->buf), sizeof(UChar), 
+                          n, bzf->handle );
+            if (n != n2 || ferror(bzf->handle))
+               { BZ_SETERR(BZ_IO_ERROR); return; };
+         }
+
+         if (ret == BZ_STREAM_END) break;
+      }
+   }
+
+   if ( !abandon && !ferror ( bzf->handle ) ) {
+      fflush ( bzf->handle );
+      if (ferror(bzf->handle))
+         { BZ_SETERR(BZ_IO_ERROR); return; };
+   }
+
+   if (nbytes_in_lo32 != NULL)
+      *nbytes_in_lo32 = bzf->strm.total_in_lo32;
+   if (nbytes_in_hi32 != NULL)
+      *nbytes_in_hi32 = bzf->strm.total_in_hi32;
+   if (nbytes_out_lo32 != NULL)
+      *nbytes_out_lo32 = bzf->strm.total_out_lo32;
+   if (nbytes_out_hi32 != NULL)
+      *nbytes_out_hi32 = bzf->strm.total_out_hi32;
+
+   BZ_SETERR(BZ_OK);
+   BZ2_bzCompressEnd ( &(bzf->strm) );
+   free ( bzf );
+}
+
+
+/*---------------------------------------------------*/
+BZFILE* BZ_API(BZ2_bzReadOpen) 
+                   ( int*  bzerror, 
+                     FILE* f, 
+                     int   verbosity,
+                     int   small,
+                     void* unused,
+                     int   nUnused )
+{
+   bzFile* bzf = NULL;
+   int     ret;
+
+   BZ_SETERR(BZ_OK);
+
+   if (f == NULL || 
+       (small != 0 && small != 1) ||
+       (verbosity < 0 || verbosity > 4) ||
+       (unused == NULL && nUnused != 0) ||
+       (unused != NULL && (nUnused < 0 || nUnused > BZ_MAX_UNUSED)))
+      { BZ_SETERR(BZ_PARAM_ERROR); return NULL; };
+
+   if (ferror(f))
+      { BZ_SETERR(BZ_IO_ERROR); return NULL; };
+
+   bzf = malloc ( sizeof(bzFile) );
+   if (bzf == NULL) 
+      { BZ_SETERR(BZ_MEM_ERROR); return NULL; };
+
+   BZ_SETERR(BZ_OK);
+
+   bzf->initialisedOk = False;
+   bzf->handle        = f;
+   bzf->bufN          = 0;
+   bzf->writing       = False;
+   bzf->strm.bzalloc  = NULL;
+   bzf->strm.bzfree   = NULL;
+   bzf->strm.opaque   = NULL;
+   
+   while (nUnused > 0) {
+      bzf->buf[bzf->bufN] = *((UChar*)(unused)); bzf->bufN++;
+      unused = ((void*)( 1 + ((UChar*)(unused))  ));
+      nUnused--;
+   }
+
+   ret = BZ2_bzDecompressInit ( &(bzf->strm), verbosity, small );
+   if (ret != BZ_OK)
+      { BZ_SETERR(ret); free(bzf); return NULL; };
+
+   bzf->strm.avail_in = bzf->bufN;
+   bzf->strm.next_in  = bzf->buf;
+
+   bzf->initialisedOk = True;
+   return bzf;   
+}
+
+
+/*---------------------------------------------------*/
+void BZ_API(BZ2_bzReadClose) ( int *bzerror, BZFILE *b )
+{
+   bzFile* bzf = (bzFile*)b;
+
+   BZ_SETERR(BZ_OK);
+   if (bzf == NULL)
+      { BZ_SETERR(BZ_OK); return; };
+
+   if (bzf->writing)
+      { BZ_SETERR(BZ_SEQUENCE_ERROR); return; };
+
+   if (bzf->initialisedOk)
+      (void)BZ2_bzDecompressEnd ( &(bzf->strm) );
+   free ( bzf );
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzRead) 
+           ( int*    bzerror, 
+             BZFILE* b, 
+             void*   buf, 
+             int     len )
+{
+   Int32   n, ret;
+   bzFile* bzf = (bzFile*)b;
+
+   BZ_SETERR(BZ_OK);
+
+   if (bzf == NULL || buf == NULL || len < 0)
+      { BZ_SETERR(BZ_PARAM_ERROR); return 0; };
+
+   if (bzf->writing)
+      { BZ_SETERR(BZ_SEQUENCE_ERROR); return 0; };
+
+   if (len == 0)
+      { BZ_SETERR(BZ_OK); return 0; };
+
+   bzf->strm.avail_out = len;
+   bzf->strm.next_out = buf;
+
+   while (True) {
+
+      if (ferror(bzf->handle)) 
+         { BZ_SETERR(BZ_IO_ERROR); return 0; };
+
+      if (bzf->strm.avail_in == 0 && !myfeof(bzf->handle)) {
+         n = fread ( bzf->buf, sizeof(UChar), 
+                     BZ_MAX_UNUSED, bzf->handle );
+         if (ferror(bzf->handle))
+            { BZ_SETERR(BZ_IO_ERROR); return 0; };
+         bzf->bufN = n;
+         bzf->strm.avail_in = bzf->bufN;
+         bzf->strm.next_in = bzf->buf;
+      }
+
+      ret = BZ2_bzDecompress ( &(bzf->strm) );
+
+      if (ret != BZ_OK && ret != BZ_STREAM_END)
+         { BZ_SETERR(ret); return 0; };
+
+      if (ret == BZ_OK && myfeof(bzf->handle) && 
+          bzf->strm.avail_in == 0 && bzf->strm.avail_out > 0)
+         { BZ_SETERR(BZ_UNEXPECTED_EOF); return 0; };
+
+      if (ret == BZ_STREAM_END)
+         { BZ_SETERR(BZ_STREAM_END);
+           return len - bzf->strm.avail_out; };
+      if (bzf->strm.avail_out == 0)
+         { BZ_SETERR(BZ_OK); return len; };
+      
+   }
+
+   return 0; /*not reached*/
+}
+
+
+/*---------------------------------------------------*/
+void BZ_API(BZ2_bzReadGetUnused) 
+                     ( int*    bzerror, 
+                       BZFILE* b, 
+                       void**  unused, 
+                       int*    nUnused )
+{
+   bzFile* bzf = (bzFile*)b;
+   if (bzf == NULL)
+      { BZ_SETERR(BZ_PARAM_ERROR); return; };
+   if (bzf->lastErr != BZ_STREAM_END)
+      { BZ_SETERR(BZ_SEQUENCE_ERROR); return; };
+   if (unused == NULL || nUnused == NULL)
+      { BZ_SETERR(BZ_PARAM_ERROR); return; };
+
+   BZ_SETERR(BZ_OK);
+   *nUnused = bzf->strm.avail_in;
+   *unused = bzf->strm.next_in;
+}
+#endif
+
+
+/*---------------------------------------------------*/
+/*--- Misc convenience stuff                      ---*/
+/*---------------------------------------------------*/
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzBuffToBuffCompress) 
+                         ( char*         dest, 
+                           unsigned int* destLen,
+                           char*         source, 
+                           unsigned int  sourceLen,
+                           int           blockSize100k, 
+                           int           verbosity, 
+                           int           workFactor )
+{
+   bz_stream strm;
+   int ret;
+
+   if (dest == NULL || destLen == NULL || 
+       source == NULL ||
+       blockSize100k < 1 || blockSize100k > 9 ||
+       verbosity < 0 || verbosity > 4 ||
+       workFactor < 0 || workFactor > 250) 
+      return BZ_PARAM_ERROR;
+
+   if (workFactor == 0) workFactor = 30;
+   strm.bzalloc = NULL;
+   strm.bzfree = NULL;
+   strm.opaque = NULL;
+   ret = BZ2_bzCompressInit ( &strm, blockSize100k, 
+                              verbosity, workFactor );
+   if (ret != BZ_OK) return ret;
+
+   strm.next_in = source;
+   strm.next_out = dest;
+   strm.avail_in = sourceLen;
+   strm.avail_out = *destLen;
+
+   ret = BZ2_bzCompress ( &strm, BZ_FINISH );
+   if (ret == BZ_FINISH_OK) goto output_overflow;
+   if (ret != BZ_STREAM_END) goto errhandler;
+
+   /* normal termination */
+   *destLen -= strm.avail_out;   
+   BZ2_bzCompressEnd ( &strm );
+   return BZ_OK;
+
+   output_overflow:
+   BZ2_bzCompressEnd ( &strm );
+   return BZ_OUTBUFF_FULL;
+
+   errhandler:
+   BZ2_bzCompressEnd ( &strm );
+   return ret;
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzBuffToBuffDecompress) 
+                           ( char*         dest, 
+                             unsigned int* destLen,
+                             char*         source, 
+                             unsigned int  sourceLen,
+                             int           small,
+                             int           verbosity )
+{
+   bz_stream strm;
+   int ret;
+
+   if (dest == NULL || destLen == NULL || 
+       source == NULL ||
+       (small != 0 && small != 1) ||
+       verbosity < 0 || verbosity > 4) 
+          return BZ_PARAM_ERROR;
+
+   strm.bzalloc = NULL;
+   strm.bzfree = NULL;
+   strm.opaque = NULL;
+   ret = BZ2_bzDecompressInit ( &strm, verbosity, small );
+   if (ret != BZ_OK) return ret;
+
+   strm.next_in = source;
+   strm.next_out = dest;
+   strm.avail_in = sourceLen;
+   strm.avail_out = *destLen;
+
+   ret = BZ2_bzDecompress ( &strm );
+   if (ret == BZ_OK) goto output_overflow_or_eof;
+   if (ret != BZ_STREAM_END) goto errhandler;
+
+   /* normal termination */
+   *destLen -= strm.avail_out;
+   BZ2_bzDecompressEnd ( &strm );
+   return BZ_OK;
+
+   output_overflow_or_eof:
+   if (strm.avail_out > 0) {
+      BZ2_bzDecompressEnd ( &strm );
+      return BZ_UNEXPECTED_EOF;
+   } else {
+      BZ2_bzDecompressEnd ( &strm );
+      return BZ_OUTBUFF_FULL;
+   };      
+
+   errhandler:
+   BZ2_bzDecompressEnd ( &strm );
+   return ret; 
+}
+
+
+/*---------------------------------------------------*/
+/*--
+   Code contributed by Yoshioka Tsuneo (tsuneo@rr.iij4u.or.jp)
+   to support better zlib compatibility.
+   This code is not _officially_ part of libbzip2 (yet);
+   I haven't tested it, documented it, or considered the
+   threading-safeness of it.
+   If this code breaks, please contact both Yoshioka and me.
+--*/
+/*---------------------------------------------------*/
+
+/*---------------------------------------------------*/
+/*--
+   return version like "0.9.5d, 4-Sept-1999".
+--*/
+const char * BZ_API(BZ2_bzlibVersion)(void)
+{
+   return BZ_VERSION;
+}
+
+
+#ifndef BZ_NO_STDIO
+/*---------------------------------------------------*/
+
+#if defined(_WIN32) || defined(OS2) || defined(MSDOS)
+#   include <fcntl.h>
+#   include <io.h>
+#   define SET_BINARY_MODE(file) setmode(fileno(file),O_BINARY)
+#else
+#   define SET_BINARY_MODE(file)
+#endif
+static
+BZFILE * bzopen_or_bzdopen
+               ( const char *path,   /* no use when bzdopen */
+                 int fd,             /* no use when bzdopen */
+                 const char *mode,
+                 int open_mode)      /* bzopen: 0, bzdopen:1 */
+{
+   int    bzerr;
+   char   unused[BZ_MAX_UNUSED];
+   int    blockSize100k = 9;
+   int    writing       = 0;
+   char   mode2[10]     = "";
+   FILE   *fp           = NULL;
+   BZFILE *bzfp         = NULL;
+   int    verbosity     = 0;
+   int    workFactor    = 30;
+   int    smallMode     = 0;
+   int    nUnused       = 0; 
+
+   if (mode == NULL) return NULL;
+   while (*mode) {
+      switch (*mode) {
+      case 'r':
+         writing = 0; break;
+      case 'w':
+         writing = 1; break;
+      case 's':
+         smallMode = 1; break;
+      default:
+         if (isdigit((int)(*mode))) {
+            blockSize100k = *mode-BZ_HDR_0;
+         }
+      }
+      mode++;
+   }
+   strcat(mode2, writing ? "w" : "r" );
+   strcat(mode2,"b");   /* binary mode */
+
+   if (open_mode==0) {
+      if (path==NULL || strcmp(path,"")==0) {
+        fp = (writing ? stdout : stdin);
+        SET_BINARY_MODE(fp);
+      } else {
+        fp = fopen(path,mode2);
+      }
+   } else {
+#ifdef BZ_STRICT_ANSI
+      fp = NULL;
+#else
+      fp = fdopen(fd,mode2);
+#endif
+   }
+   if (fp == NULL) return NULL;
+
+   if (writing) {
+      /* Guard against total chaos and anarchy -- JRS */
+      if (blockSize100k < 1) blockSize100k = 1;
+      if (blockSize100k > 9) blockSize100k = 9; 
+      bzfp = BZ2_bzWriteOpen(&bzerr,fp,blockSize100k,
+                             verbosity,workFactor);
+   } else {
+      bzfp = BZ2_bzReadOpen(&bzerr,fp,verbosity,smallMode,
+                            unused,nUnused);
+   }
+   if (bzfp == NULL) {
+      if (fp != stdin && fp != stdout) fclose(fp);
+      return NULL;
+   }
+   return bzfp;
+}
+
+
+/*---------------------------------------------------*/
+/*--
+   open file for read or write.
+      ex) bzopen("file","w9")
+      case path="" or NULL => use stdin or stdout.
+--*/
+BZFILE * BZ_API(BZ2_bzopen)
+               ( const char *path,
+                 const char *mode )
+{
+   return bzopen_or_bzdopen(path,-1,mode,/*bzopen*/0);
+}
+
+
+/*---------------------------------------------------*/
+BZFILE * BZ_API(BZ2_bzdopen)
+               ( int fd,
+                 const char *mode )
+{
+   return bzopen_or_bzdopen(NULL,fd,mode,/*bzdopen*/1);
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzread) (BZFILE* b, void* buf, int len )
+{
+   int bzerr, nread;
+   if (((bzFile*)b)->lastErr == BZ_STREAM_END) return 0;
+   nread = BZ2_bzRead(&bzerr,b,buf,len);
+   if (bzerr == BZ_OK || bzerr == BZ_STREAM_END) {
+      return nread;
+   } else {
+      return -1;
+   }
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzwrite) (BZFILE* b, void* buf, int len )
+{
+   int bzerr;
+
+   BZ2_bzWrite(&bzerr,b,buf,len);
+   if(bzerr == BZ_OK){
+      return len;
+   }else{
+      return -1;
+   }
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzflush) (BZFILE *b)
+{
+   /* do nothing now... */
+   return 0;
+}
+
+
+/*---------------------------------------------------*/
+void BZ_API(BZ2_bzclose) (BZFILE* b)
+{
+   int bzerr;
+   FILE *fp;
+   
+   if (b==NULL) {return;}
+   fp = ((bzFile *)b)->handle;
+   if(((bzFile*)b)->writing){
+      BZ2_bzWriteClose(&bzerr,b,0,NULL,NULL);
+      if(bzerr != BZ_OK){
+         BZ2_bzWriteClose(NULL,b,1,NULL,NULL);
+      }
+   }else{
+      BZ2_bzReadClose(&bzerr,b);
+   }
+   if(fp!=stdin && fp!=stdout){
+      fclose(fp);
+   }
+}
+
+
+/*---------------------------------------------------*/
+/*--
+   return last error code 
+--*/
+static const char *bzerrorstrings[] = {
+       "OK"
+      ,"SEQUENCE_ERROR"
+      ,"PARAM_ERROR"
+      ,"MEM_ERROR"
+      ,"DATA_ERROR"
+      ,"DATA_ERROR_MAGIC"
+      ,"IO_ERROR"
+      ,"UNEXPECTED_EOF"
+      ,"OUTBUFF_FULL"
+      ,"CONFIG_ERROR"
+      ,"???"   /* for future */
+      ,"???"   /* for future */
+      ,"???"   /* for future */
+      ,"???"   /* for future */
+      ,"???"   /* for future */
+      ,"???"   /* for future */
+};
+
+
+const char * BZ_API(BZ2_bzerror) (BZFILE *b, int *errnum)
+{
+   int err = ((bzFile *)b)->lastErr;
+
+   if(err>0) err = 0;
+   *errnum = err;
+   return bzerrorstrings[err*-1];
+}
+#endif
+
+
+/*-------------------------------------------------------------*/
+/*--- end                                           bzlib.c ---*/
+/*-------------------------------------------------------------*/
diff --git a/libraries/bzip2/bzlib.h b/libraries/bzip2/bzlib.h
new file mode 100644
index 000000000..8966a6c58
--- /dev/null
+++ b/libraries/bzip2/bzlib.h
@@ -0,0 +1,282 @@
+
+/*-------------------------------------------------------------*/
+/*--- Public header file for the library.                   ---*/
+/*---                                               bzlib.h ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+   This file is part of bzip2/libbzip2, a program and library for
+   lossless, block-sorting data compression.
+
+   bzip2/libbzip2 version 1.0.8 of 13 July 2019
+   Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
+
+   Please read the WARNING, DISCLAIMER and PATENTS sections in the 
+   README file.
+
+   This program is released under the terms of the license contained
+   in the file LICENSE.
+   ------------------------------------------------------------------ */
+
+
+#ifndef _BZLIB_H
+#define _BZLIB_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define BZ_RUN               0
+#define BZ_FLUSH             1
+#define BZ_FINISH            2
+
+#define BZ_OK                0
+#define BZ_RUN_OK            1
+#define BZ_FLUSH_OK          2
+#define BZ_FINISH_OK         3
+#define BZ_STREAM_END        4
+#define BZ_SEQUENCE_ERROR    (-1)
+#define BZ_PARAM_ERROR       (-2)
+#define BZ_MEM_ERROR         (-3)
+#define BZ_DATA_ERROR        (-4)
+#define BZ_DATA_ERROR_MAGIC  (-5)
+#define BZ_IO_ERROR          (-6)
+#define BZ_UNEXPECTED_EOF    (-7)
+#define BZ_OUTBUFF_FULL      (-8)
+#define BZ_CONFIG_ERROR      (-9)
+
+typedef 
+   struct {
+      char *next_in;
+      unsigned int avail_in;
+      unsigned int total_in_lo32;
+      unsigned int total_in_hi32;
+
+      char *next_out;
+      unsigned int avail_out;
+      unsigned int total_out_lo32;
+      unsigned int total_out_hi32;
+
+      void *state;
+
+      void *(*bzalloc)(void *,int,int);
+      void (*bzfree)(void *,void *);
+      void *opaque;
+   } 
+   bz_stream;
+
+
+#ifndef BZ_IMPORT
+#define BZ_EXPORT
+#endif
+
+#ifndef BZ_NO_STDIO
+/* Need a definitition for FILE */
+#include <stdio.h>
+#endif
+
+#ifdef _WIN32
+#   include <windows.h>
+#   ifdef small
+      /* windows.h define small to char */
+#      undef small
+#   endif
+#   ifdef BZ_EXPORT
+#   define BZ_API(func) WINAPI func
+#   define BZ_EXTERN extern
+#   else
+   /* import windows dll dynamically */
+#   define BZ_API(func) (WINAPI * func)
+#   define BZ_EXTERN
+#   endif
+#else
+#   define BZ_API(func) func
+#   define BZ_EXTERN extern
+#endif
+
+
+/*-- Core (low-level) library functions --*/
+
+BZ_EXTERN int BZ_API(BZ2_bzCompressInit) ( 
+      bz_stream* strm, 
+      int        blockSize100k, 
+      int        verbosity, 
+      int        workFactor 
+   );
+
+BZ_EXTERN int BZ_API(BZ2_bzCompress) ( 
+      bz_stream* strm, 
+      int action 
+   );
+
+BZ_EXTERN int BZ_API(BZ2_bzCompressEnd) ( 
+      bz_stream* strm 
+   );
+
+BZ_EXTERN int BZ_API(BZ2_bzDecompressInit) ( 
+      bz_stream *strm, 
+      int       verbosity, 
+      int       small
+   );
+
+BZ_EXTERN int BZ_API(BZ2_bzDecompress) ( 
+      bz_stream* strm 
+   );
+
+BZ_EXTERN int BZ_API(BZ2_bzDecompressEnd) ( 
+      bz_stream *strm 
+   );
+
+
+
+/*-- High(er) level library functions --*/
+
+#ifndef BZ_NO_STDIO
+#define BZ_MAX_UNUSED 5000
+
+typedef void BZFILE;
+
+BZ_EXTERN BZFILE* BZ_API(BZ2_bzReadOpen) ( 
+      int*  bzerror,   
+      FILE* f, 
+      int   verbosity, 
+      int   small,
+      void* unused,    
+      int   nUnused 
+   );
+
+BZ_EXTERN void BZ_API(BZ2_bzReadClose) ( 
+      int*    bzerror, 
+      BZFILE* b 
+   );
+
+BZ_EXTERN void BZ_API(BZ2_bzReadGetUnused) ( 
+      int*    bzerror, 
+      BZFILE* b, 
+      void**  unused,  
+      int*    nUnused 
+   );
+
+BZ_EXTERN int BZ_API(BZ2_bzRead) ( 
+      int*    bzerror, 
+      BZFILE* b, 
+      void*   buf, 
+      int     len 
+   );
+
+BZ_EXTERN BZFILE* BZ_API(BZ2_bzWriteOpen) ( 
+      int*  bzerror,      
+      FILE* f, 
+      int   blockSize100k, 
+      int   verbosity, 
+      int   workFactor 
+   );
+
+BZ_EXTERN void BZ_API(BZ2_bzWrite) ( 
+      int*    bzerror, 
+      BZFILE* b, 
+      void*   buf, 
+      int     len 
+   );
+
+BZ_EXTERN void BZ_API(BZ2_bzWriteClose) ( 
+      int*          bzerror, 
+      BZFILE*       b, 
+      int           abandon, 
+      unsigned int* nbytes_in, 
+      unsigned int* nbytes_out 
+   );
+
+BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) ( 
+      int*          bzerror, 
+      BZFILE*       b, 
+      int           abandon, 
+      unsigned int* nbytes_in_lo32, 
+      unsigned int* nbytes_in_hi32, 
+      unsigned int* nbytes_out_lo32, 
+      unsigned int* nbytes_out_hi32
+   );
+#endif
+
+
+/*-- Utility functions --*/
+
+BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffCompress) ( 
+      char*         dest, 
+      unsigned int* destLen,
+      char*         source, 
+      unsigned int  sourceLen,
+      int           blockSize100k, 
+      int           verbosity, 
+      int           workFactor 
+   );
+
+BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffDecompress) ( 
+      char*         dest, 
+      unsigned int* destLen,
+      char*         source, 
+      unsigned int  sourceLen,
+      int           small, 
+      int           verbosity 
+   );
+
+
+/*--
+   Code contributed by Yoshioka Tsuneo (tsuneo@rr.iij4u.or.jp)
+   to support better zlib compatibility.
+   This code is not _officially_ part of libbzip2 (yet);
+   I haven't tested it, documented it, or considered the
+   threading-safeness of it.
+   If this code breaks, please contact both Yoshioka and me.
+--*/
+
+BZ_EXTERN const char * BZ_API(BZ2_bzlibVersion) (
+      void
+   );
+
+#ifndef BZ_NO_STDIO
+BZ_EXTERN BZFILE * BZ_API(BZ2_bzopen) (
+      const char *path,
+      const char *mode
+   );
+
+BZ_EXTERN BZFILE * BZ_API(BZ2_bzdopen) (
+      int        fd,
+      const char *mode
+   );
+         
+BZ_EXTERN int BZ_API(BZ2_bzread) (
+      BZFILE* b, 
+      void* buf, 
+      int len 
+   );
+
+BZ_EXTERN int BZ_API(BZ2_bzwrite) (
+      BZFILE* b, 
+      void*   buf, 
+      int     len 
+   );
+
+BZ_EXTERN int BZ_API(BZ2_bzflush) (
+      BZFILE* b
+   );
+
+BZ_EXTERN void BZ_API(BZ2_bzclose) (
+      BZFILE* b
+   );
+
+BZ_EXTERN const char * BZ_API(BZ2_bzerror) (
+      BZFILE *b, 
+      int    *errnum
+   );
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
+/*-------------------------------------------------------------*/
+/*--- end                                           bzlib.h ---*/
+/*-------------------------------------------------------------*/
diff --git a/libraries/bzip2/bzlib_private.h b/libraries/bzip2/bzlib_private.h
new file mode 100644
index 000000000..3755a6f70
--- /dev/null
+++ b/libraries/bzip2/bzlib_private.h
@@ -0,0 +1,509 @@
+
+/*-------------------------------------------------------------*/
+/*--- Private header file for the library.                  ---*/
+/*---                                       bzlib_private.h ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+   This file is part of bzip2/libbzip2, a program and library for
+   lossless, block-sorting data compression.
+
+   bzip2/libbzip2 version 1.0.8 of 13 July 2019
+   Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
+
+   Please read the WARNING, DISCLAIMER and PATENTS sections in the 
+   README file.
+
+   This program is released under the terms of the license contained
+   in the file LICENSE.
+   ------------------------------------------------------------------ */
+
+
+#ifndef _BZLIB_PRIVATE_H
+#define _BZLIB_PRIVATE_H
+
+#include <stdlib.h>
+
+#ifndef BZ_NO_STDIO
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+#endif
+
+#include "bzlib.h"
+
+
+
+/*-- General stuff. --*/
+
+#define BZ_VERSION  "1.0.8, 13-Jul-2019"
+
+typedef char            Char;
+typedef unsigned char   Bool;
+typedef unsigned char   UChar;
+typedef int             Int32;
+typedef unsigned int    UInt32;
+typedef short           Int16;
+typedef unsigned short  UInt16;
+
+#define True  ((Bool)1)
+#define False ((Bool)0)
+
+#ifndef __GNUC__
+#define __inline__  /* */
+#endif 
+
+#ifndef BZ_NO_STDIO
+
+extern void BZ2_bz__AssertH__fail ( int errcode );
+#define AssertH(cond,errcode) \
+   { if (!(cond)) BZ2_bz__AssertH__fail ( errcode ); }
+
+#if BZ_DEBUG
+#define AssertD(cond,msg) \
+   { if (!(cond)) {       \
+      fprintf ( stderr,   \
+        "\n\nlibbzip2(debug build): internal error\n\t%s\n", msg );\
+      exit(1); \
+   }}
+#else
+#define AssertD(cond,msg) /* */
+#endif
+
+#define VPrintf0(zf) \
+   fprintf(stderr,zf)
+#define VPrintf1(zf,za1) \
+   fprintf(stderr,zf,za1)
+#define VPrintf2(zf,za1,za2) \
+   fprintf(stderr,zf,za1,za2)
+#define VPrintf3(zf,za1,za2,za3) \
+   fprintf(stderr,zf,za1,za2,za3)
+#define VPrintf4(zf,za1,za2,za3,za4) \
+   fprintf(stderr,zf,za1,za2,za3,za4)
+#define VPrintf5(zf,za1,za2,za3,za4,za5) \
+   fprintf(stderr,zf,za1,za2,za3,za4,za5)
+
+#else
+
+extern void bz_internal_error ( int errcode );
+#define AssertH(cond,errcode) \
+   { if (!(cond)) bz_internal_error ( errcode ); }
+#define AssertD(cond,msg)                do { } while (0)
+#define VPrintf0(zf)                     do { } while (0)
+#define VPrintf1(zf,za1)                 do { } while (0)
+#define VPrintf2(zf,za1,za2)             do { } while (0)
+#define VPrintf3(zf,za1,za2,za3)         do { } while (0)
+#define VPrintf4(zf,za1,za2,za3,za4)     do { } while (0)
+#define VPrintf5(zf,za1,za2,za3,za4,za5) do { } while (0)
+
+#endif
+
+
+#define BZALLOC(nnn) (strm->bzalloc)(strm->opaque,(nnn),1)
+#define BZFREE(ppp)  (strm->bzfree)(strm->opaque,(ppp))
+
+
+/*-- Header bytes. --*/
+
+#define BZ_HDR_B 0x42   /* 'B' */
+#define BZ_HDR_Z 0x5a   /* 'Z' */
+#define BZ_HDR_h 0x68   /* 'h' */
+#define BZ_HDR_0 0x30   /* '0' */
+  
+/*-- Constants for the back end. --*/
+
+#define BZ_MAX_ALPHA_SIZE 258
+#define BZ_MAX_CODE_LEN    23
+
+#define BZ_RUNA 0
+#define BZ_RUNB 1
+
+#define BZ_N_GROUPS 6
+#define BZ_G_SIZE   50
+#define BZ_N_ITERS  4
+
+#define BZ_MAX_SELECTORS (2 + (900000 / BZ_G_SIZE))
+
+
+
+/*-- Stuff for randomising repetitive blocks. --*/
+
+extern Int32 BZ2_rNums[512];
+
+#define BZ_RAND_DECLS                          \
+   Int32 rNToGo;                               \
+   Int32 rTPos                                 \
+
+#define BZ_RAND_INIT_MASK                      \
+   s->rNToGo = 0;                              \
+   s->rTPos  = 0                               \
+
+#define BZ_RAND_MASK ((s->rNToGo == 1) ? 1 : 0)
+
+#define BZ_RAND_UPD_MASK                       \
+   if (s->rNToGo == 0) {                       \
+      s->rNToGo = BZ2_rNums[s->rTPos];         \
+      s->rTPos++;                              \
+      if (s->rTPos == 512) s->rTPos = 0;       \
+   }                                           \
+   s->rNToGo--;
+
+
+
+/*-- Stuff for doing CRCs. --*/
+
+extern UInt32 BZ2_crc32Table[256];
+
+#define BZ_INITIALISE_CRC(crcVar)              \
+{                                              \
+   crcVar = 0xffffffffL;                       \
+}
+
+#define BZ_FINALISE_CRC(crcVar)                \
+{                                              \
+   crcVar = ~(crcVar);                         \
+}
+
+#define BZ_UPDATE_CRC(crcVar,cha)              \
+{                                              \
+   crcVar = (crcVar << 8) ^                    \
+            BZ2_crc32Table[(crcVar >> 24) ^    \
+                           ((UChar)cha)];      \
+}
+
+
+
+/*-- States and modes for compression. --*/
+
+#define BZ_M_IDLE      1
+#define BZ_M_RUNNING   2
+#define BZ_M_FLUSHING  3
+#define BZ_M_FINISHING 4
+
+#define BZ_S_OUTPUT    1
+#define BZ_S_INPUT     2
+
+#define BZ_N_RADIX 2
+#define BZ_N_QSORT 12
+#define BZ_N_SHELL 18
+#define BZ_N_OVERSHOOT (BZ_N_RADIX + BZ_N_QSORT + BZ_N_SHELL + 2)
+
+
+
+
+/*-- Structure holding all the compression-side stuff. --*/
+
+typedef
+   struct {
+      /* pointer back to the struct bz_stream */
+      bz_stream* strm;
+
+      /* mode this stream is in, and whether inputting */
+      /* or outputting data */
+      Int32    mode;
+      Int32    state;
+
+      /* remembers avail_in when flush/finish requested */
+      UInt32   avail_in_expect;
+
+      /* for doing the block sorting */
+      UInt32*  arr1;
+      UInt32*  arr2;
+      UInt32*  ftab;
+      Int32    origPtr;
+
+      /* aliases for arr1 and arr2 */
+      UInt32*  ptr;
+      UChar*   block;
+      UInt16*  mtfv;
+      UChar*   zbits;
+
+      /* for deciding when to use the fallback sorting algorithm */
+      Int32    workFactor;
+
+      /* run-length-encoding of the input */
+      UInt32   state_in_ch;
+      Int32    state_in_len;
+      BZ_RAND_DECLS;
+
+      /* input and output limits and current posns */
+      Int32    nblock;
+      Int32    nblockMAX;
+      Int32    numZ;
+      Int32    state_out_pos;
+
+      /* map of bytes used in block */
+      Int32    nInUse;
+      Bool     inUse[256];
+      UChar    unseqToSeq[256];
+
+      /* the buffer for bit stream creation */
+      UInt32   bsBuff;
+      Int32    bsLive;
+
+      /* block and combined CRCs */
+      UInt32   blockCRC;
+      UInt32   combinedCRC;
+
+      /* misc administratium */
+      Int32    verbosity;
+      Int32    blockNo;
+      Int32    blockSize100k;
+
+      /* stuff for coding the MTF values */
+      Int32    nMTF;
+      Int32    mtfFreq    [BZ_MAX_ALPHA_SIZE];
+      UChar    selector   [BZ_MAX_SELECTORS];
+      UChar    selectorMtf[BZ_MAX_SELECTORS];
+
+      UChar    len     [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+      Int32    code    [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+      Int32    rfreq   [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+      /* second dimension: only 3 needed; 4 makes index calculations faster */
+      UInt32   len_pack[BZ_MAX_ALPHA_SIZE][4];
+
+   }
+   EState;
+
+
+
+/*-- externs for compression. --*/
+
+extern void 
+BZ2_blockSort ( EState* );
+
+extern void 
+BZ2_compressBlock ( EState*, Bool );
+
+extern void 
+BZ2_bsInitWrite ( EState* );
+
+extern void 
+BZ2_hbAssignCodes ( Int32*, UChar*, Int32, Int32, Int32 );
+
+extern void 
+BZ2_hbMakeCodeLengths ( UChar*, Int32*, Int32, Int32 );
+
+
+
+/*-- states for decompression. --*/
+
+#define BZ_X_IDLE        1
+#define BZ_X_OUTPUT      2
+
+#define BZ_X_MAGIC_1     10
+#define BZ_X_MAGIC_2     11
+#define BZ_X_MAGIC_3     12
+#define BZ_X_MAGIC_4     13
+#define BZ_X_BLKHDR_1    14
+#define BZ_X_BLKHDR_2    15
+#define BZ_X_BLKHDR_3    16
+#define BZ_X_BLKHDR_4    17
+#define BZ_X_BLKHDR_5    18
+#define BZ_X_BLKHDR_6    19
+#define BZ_X_BCRC_1      20
+#define BZ_X_BCRC_2      21
+#define BZ_X_BCRC_3      22
+#define BZ_X_BCRC_4      23
+#define BZ_X_RANDBIT     24
+#define BZ_X_ORIGPTR_1   25
+#define BZ_X_ORIGPTR_2   26
+#define BZ_X_ORIGPTR_3   27
+#define BZ_X_MAPPING_1   28
+#define BZ_X_MAPPING_2   29
+#define BZ_X_SELECTOR_1  30
+#define BZ_X_SELECTOR_2  31
+#define BZ_X_SELECTOR_3  32
+#define BZ_X_CODING_1    33
+#define BZ_X_CODING_2    34
+#define BZ_X_CODING_3    35
+#define BZ_X_MTF_1       36
+#define BZ_X_MTF_2       37
+#define BZ_X_MTF_3       38
+#define BZ_X_MTF_4       39
+#define BZ_X_MTF_5       40
+#define BZ_X_MTF_6       41
+#define BZ_X_ENDHDR_2    42
+#define BZ_X_ENDHDR_3    43
+#define BZ_X_ENDHDR_4    44
+#define BZ_X_ENDHDR_5    45
+#define BZ_X_ENDHDR_6    46
+#define BZ_X_CCRC_1      47
+#define BZ_X_CCRC_2      48
+#define BZ_X_CCRC_3      49
+#define BZ_X_CCRC_4      50
+
+
+
+/*-- Constants for the fast MTF decoder. --*/
+
+#define MTFA_SIZE 4096
+#define MTFL_SIZE 16
+
+
+
+/*-- Structure holding all the decompression-side stuff. --*/
+
+typedef
+   struct {
+      /* pointer back to the struct bz_stream */
+      bz_stream* strm;
+
+      /* state indicator for this stream */
+      Int32    state;
+
+      /* for doing the final run-length decoding */
+      UChar    state_out_ch;
+      Int32    state_out_len;
+      Bool     blockRandomised;
+      BZ_RAND_DECLS;
+
+      /* the buffer for bit stream reading */
+      UInt32   bsBuff;
+      Int32    bsLive;
+
+      /* misc administratium */
+      Int32    blockSize100k;
+      Bool     smallDecompress;
+      Int32    currBlockNo;
+      Int32    verbosity;
+
+      /* for undoing the Burrows-Wheeler transform */
+      Int32    origPtr;
+      UInt32   tPos;
+      Int32    k0;
+      Int32    unzftab[256];
+      Int32    nblock_used;
+      Int32    cftab[257];
+      Int32    cftabCopy[257];
+
+      /* for undoing the Burrows-Wheeler transform (FAST) */
+      UInt32   *tt;
+
+      /* for undoing the Burrows-Wheeler transform (SMALL) */
+      UInt16   *ll16;
+      UChar    *ll4;
+
+      /* stored and calculated CRCs */
+      UInt32   storedBlockCRC;
+      UInt32   storedCombinedCRC;
+      UInt32   calculatedBlockCRC;
+      UInt32   calculatedCombinedCRC;
+
+      /* map of bytes used in block */
+      Int32    nInUse;
+      Bool     inUse[256];
+      Bool     inUse16[16];
+      UChar    seqToUnseq[256];
+
+      /* for decoding the MTF values */
+      UChar    mtfa   [MTFA_SIZE];
+      Int32    mtfbase[256 / MTFL_SIZE];
+      UChar    selector   [BZ_MAX_SELECTORS];
+      UChar    selectorMtf[BZ_MAX_SELECTORS];
+      UChar    len  [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+
+      Int32    limit  [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+      Int32    base   [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+      Int32    perm   [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+      Int32    minLens[BZ_N_GROUPS];
+
+      /* save area for scalars in the main decompress code */
+      Int32    save_i;
+      Int32    save_j;
+      Int32    save_t;
+      Int32    save_alphaSize;
+      Int32    save_nGroups;
+      Int32    save_nSelectors;
+      Int32    save_EOB;
+      Int32    save_groupNo;
+      Int32    save_groupPos;
+      Int32    save_nextSym;
+      Int32    save_nblockMAX;
+      Int32    save_nblock;
+      Int32    save_es;
+      Int32    save_N;
+      Int32    save_curr;
+      Int32    save_zt;
+      Int32    save_zn; 
+      Int32    save_zvec;
+      Int32    save_zj;
+      Int32    save_gSel;
+      Int32    save_gMinlen;
+      Int32*   save_gLimit;
+      Int32*   save_gBase;
+      Int32*   save_gPerm;
+
+   }
+   DState;
+
+
+
+/*-- Macros for decompression. --*/
+
+#define BZ_GET_FAST(cccc)                     \
+    /* c_tPos is unsigned, hence test < 0 is pointless. */ \
+    if (s->tPos >= (UInt32)100000 * (UInt32)s->blockSize100k) return True; \
+    s->tPos = s->tt[s->tPos];                 \
+    cccc = (UChar)(s->tPos & 0xff);           \
+    s->tPos >>= 8;
+
+#define BZ_GET_FAST_C(cccc)                   \
+    /* c_tPos is unsigned, hence test < 0 is pointless. */ \
+    if (c_tPos >= (UInt32)100000 * (UInt32)ro_blockSize100k) return True; \
+    c_tPos = c_tt[c_tPos];                    \
+    cccc = (UChar)(c_tPos & 0xff);            \
+    c_tPos >>= 8;
+
+#define SET_LL4(i,n)                                          \
+   { if (((i) & 0x1) == 0)                                    \
+        s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0xf0) | (n); else    \
+        s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0x0f) | ((n) << 4);  \
+   }
+
+#define GET_LL4(i)                             \
+   ((((UInt32)(s->ll4[(i) >> 1])) >> (((i) << 2) & 0x4)) & 0xF)
+
+#define SET_LL(i,n)                          \
+   { s->ll16[i] = (UInt16)(n & 0x0000ffff);  \
+     SET_LL4(i, n >> 16);                    \
+   }
+
+#define GET_LL(i) \
+   (((UInt32)s->ll16[i]) | (GET_LL4(i) << 16))
+
+#define BZ_GET_SMALL(cccc)                            \
+    /* c_tPos is unsigned, hence test < 0 is pointless. */ \
+    if (s->tPos >= (UInt32)100000 * (UInt32)s->blockSize100k) return True; \
+    cccc = BZ2_indexIntoF ( s->tPos, s->cftab );    \
+    s->tPos = GET_LL(s->tPos);
+
+
+/*-- externs for decompression. --*/
+
+extern Int32 
+BZ2_indexIntoF ( Int32, Int32* );
+
+extern Int32 
+BZ2_decompress ( DState* );
+
+extern void 
+BZ2_hbCreateDecodeTables ( Int32*, Int32*, Int32*, UChar*,
+                           Int32,  Int32, Int32 );
+
+
+#endif
+
+
+/*-- BZ_NO_STDIO seems to make NULL disappear on some platforms. --*/
+
+#ifdef BZ_NO_STDIO
+#ifndef NULL
+#define NULL 0
+#endif
+#endif
+
+
+/*-------------------------------------------------------------*/
+/*--- end                                   bzlib_private.h ---*/
+/*-------------------------------------------------------------*/
diff --git a/libraries/bzip2/compress.c b/libraries/bzip2/compress.c
new file mode 100644
index 000000000..5dfa00231
--- /dev/null
+++ b/libraries/bzip2/compress.c
@@ -0,0 +1,672 @@
+
+/*-------------------------------------------------------------*/
+/*--- Compression machinery (not incl block sorting)        ---*/
+/*---                                            compress.c ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+   This file is part of bzip2/libbzip2, a program and library for
+   lossless, block-sorting data compression.
+
+   bzip2/libbzip2 version 1.0.8 of 13 July 2019
+   Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
+
+   Please read the WARNING, DISCLAIMER and PATENTS sections in the 
+   README file.
+
+   This program is released under the terms of the license contained
+   in the file LICENSE.
+   ------------------------------------------------------------------ */
+
+
+/* CHANGES
+    0.9.0    -- original version.
+    0.9.0a/b -- no changes in this file.
+    0.9.0c   -- changed setting of nGroups in sendMTFValues() 
+                so as to do a bit better on small files
+*/
+
+#include "bzlib_private.h"
+
+
+/*---------------------------------------------------*/
+/*--- Bit stream I/O                              ---*/
+/*---------------------------------------------------*/
+
+/*---------------------------------------------------*/
+void BZ2_bsInitWrite ( EState* s )
+{
+   s->bsLive = 0;
+   s->bsBuff = 0;
+}
+
+
+/*---------------------------------------------------*/
+static
+void bsFinishWrite ( EState* s )
+{
+   while (s->bsLive > 0) {
+      s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24);
+      s->numZ++;
+      s->bsBuff <<= 8;
+      s->bsLive -= 8;
+   }
+}
+
+
+/*---------------------------------------------------*/
+#define bsNEEDW(nz)                           \
+{                                             \
+   while (s->bsLive >= 8) {                   \
+      s->zbits[s->numZ]                       \
+         = (UChar)(s->bsBuff >> 24);          \
+      s->numZ++;                              \
+      s->bsBuff <<= 8;                        \
+      s->bsLive -= 8;                         \
+   }                                          \
+}
+
+
+/*---------------------------------------------------*/
+static
+__inline__
+void bsW ( EState* s, Int32 n, UInt32 v )
+{
+   bsNEEDW ( n );
+   s->bsBuff |= (v << (32 - s->bsLive - n));
+   s->bsLive += n;
+}
+
+
+/*---------------------------------------------------*/
+static
+void bsPutUInt32 ( EState* s, UInt32 u )
+{
+   bsW ( s, 8, (u >> 24) & 0xffL );
+   bsW ( s, 8, (u >> 16) & 0xffL );
+   bsW ( s, 8, (u >>  8) & 0xffL );
+   bsW ( s, 8,  u        & 0xffL );
+}
+
+
+/*---------------------------------------------------*/
+static
+void bsPutUChar ( EState* s, UChar c )
+{
+   bsW( s, 8, (UInt32)c );
+}
+
+
+/*---------------------------------------------------*/
+/*--- The back end proper                         ---*/
+/*---------------------------------------------------*/
+
+/*---------------------------------------------------*/
+static
+void makeMaps_e ( EState* s )
+{
+   Int32 i;
+   s->nInUse = 0;
+   for (i = 0; i < 256; i++)
+      if (s->inUse[i]) {
+         s->unseqToSeq[i] = s->nInUse;
+         s->nInUse++;
+      }
+}
+
+
+/*---------------------------------------------------*/
+static
+void generateMTFValues ( EState* s )
+{
+   UChar   yy[256];
+   Int32   i, j;
+   Int32   zPend;
+   Int32   wr;
+   Int32   EOB;
+
+   /* 
+      After sorting (eg, here),
+         s->arr1 [ 0 .. s->nblock-1 ] holds sorted order,
+         and
+         ((UChar*)s->arr2) [ 0 .. s->nblock-1 ] 
+         holds the original block data.
+
+      The first thing to do is generate the MTF values,
+      and put them in
+         ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ].
+      Because there are strictly fewer or equal MTF values
+      than block values, ptr values in this area are overwritten
+      with MTF values only when they are no longer needed.
+
+      The final compressed bitstream is generated into the
+      area starting at
+         (UChar*) (&((UChar*)s->arr2)[s->nblock])
+
+      These storage aliases are set up in bzCompressInit(),
+      except for the last one, which is arranged in 
+      compressBlock().
+   */
+   UInt32* ptr   = s->ptr;
+   UChar* block  = s->block;
+   UInt16* mtfv  = s->mtfv;
+
+   makeMaps_e ( s );
+   EOB = s->nInUse+1;
+
+   for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0;
+
+   wr = 0;
+   zPend = 0;
+   for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i;
+
+   for (i = 0; i < s->nblock; i++) {
+      UChar ll_i;
+      AssertD ( wr <= i, "generateMTFValues(1)" );
+      j = ptr[i]-1; if (j < 0) j += s->nblock;
+      ll_i = s->unseqToSeq[block[j]];
+      AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" );
+
+      if (yy[0] == ll_i) { 
+         zPend++;
+      } else {
+
+         if (zPend > 0) {
+            zPend--;
+            while (True) {
+               if (zPend & 1) {
+                  mtfv[wr] = BZ_RUNB; wr++; 
+                  s->mtfFreq[BZ_RUNB]++; 
+               } else {
+                  mtfv[wr] = BZ_RUNA; wr++; 
+                  s->mtfFreq[BZ_RUNA]++; 
+               }
+               if (zPend < 2) break;
+               zPend = (zPend - 2) / 2;
+            };
+            zPend = 0;
+         }
+         {
+            register UChar  rtmp;
+            register UChar* ryy_j;
+            register UChar  rll_i;
+            rtmp  = yy[1];
+            yy[1] = yy[0];
+            ryy_j = &(yy[1]);
+            rll_i = ll_i;
+            while ( rll_i != rtmp ) {
+               register UChar rtmp2;
+               ryy_j++;
+               rtmp2  = rtmp;
+               rtmp   = *ryy_j;
+               *ryy_j = rtmp2;
+            };
+            yy[0] = rtmp;
+            j = ryy_j - &(yy[0]);
+            mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++;
+         }
+
+      }
+   }
+
+   if (zPend > 0) {
+      zPend--;
+      while (True) {
+         if (zPend & 1) {
+            mtfv[wr] = BZ_RUNB; wr++; 
+            s->mtfFreq[BZ_RUNB]++; 
+         } else {
+            mtfv[wr] = BZ_RUNA; wr++; 
+            s->mtfFreq[BZ_RUNA]++; 
+         }
+         if (zPend < 2) break;
+         zPend = (zPend - 2) / 2;
+      };
+      zPend = 0;
+   }
+
+   mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++;
+
+   s->nMTF = wr;
+}
+
+
+/*---------------------------------------------------*/
+#define BZ_LESSER_ICOST  0
+#define BZ_GREATER_ICOST 15
+
+static
+void sendMTFValues ( EState* s )
+{
+   Int32 v, t, i, j, gs, ge, totc, bt, bc, iter;
+   Int32 nSelectors, alphaSize, minLen, maxLen, selCtr;
+   Int32 nGroups, nBytes;
+
+   /*--
+   UChar  len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+   is a global since the decoder also needs it.
+
+   Int32  code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+   Int32  rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+   are also globals only used in this proc.
+   Made global to keep stack frame size small.
+   --*/
+
+
+   UInt16 cost[BZ_N_GROUPS];
+   Int32  fave[BZ_N_GROUPS];
+
+   UInt16* mtfv = s->mtfv;
+
+   if (s->verbosity >= 3)
+      VPrintf3( "      %d in block, %d after MTF & 1-2 coding, "
+                "%d+2 syms in use\n", 
+                s->nblock, s->nMTF, s->nInUse );
+
+   alphaSize = s->nInUse+2;
+   for (t = 0; t < BZ_N_GROUPS; t++)
+      for (v = 0; v < alphaSize; v++)
+         s->len[t][v] = BZ_GREATER_ICOST;
+
+   /*--- Decide how many coding tables to use ---*/
+   AssertH ( s->nMTF > 0, 3001 );
+   if (s->nMTF < 200)  nGroups = 2; else
+   if (s->nMTF < 600)  nGroups = 3; else
+   if (s->nMTF < 1200) nGroups = 4; else
+   if (s->nMTF < 2400) nGroups = 5; else
+                       nGroups = 6;
+
+   /*--- Generate an initial set of coding tables ---*/
+   { 
+      Int32 nPart, remF, tFreq, aFreq;
+
+      nPart = nGroups;
+      remF  = s->nMTF;
+      gs = 0;
+      while (nPart > 0) {
+         tFreq = remF / nPart;
+         ge = gs-1;
+         aFreq = 0;
+         while (aFreq < tFreq && ge < alphaSize-1) {
+            ge++;
+            aFreq += s->mtfFreq[ge];
+         }
+
+         if (ge > gs 
+             && nPart != nGroups && nPart != 1 
+             && ((nGroups-nPart) % 2 == 1)) {
+            aFreq -= s->mtfFreq[ge];
+            ge--;
+         }
+
+         if (s->verbosity >= 3)
+            VPrintf5( "      initial group %d, [%d .. %d], "
+                      "has %d syms (%4.1f%%)\n",
+                      nPart, gs, ge, aFreq, 
+                      (100.0 * (float)aFreq) / (float)(s->nMTF) );
+ 
+         for (v = 0; v < alphaSize; v++)
+            if (v >= gs && v <= ge) 
+               s->len[nPart-1][v] = BZ_LESSER_ICOST; else
+               s->len[nPart-1][v] = BZ_GREATER_ICOST;
+ 
+         nPart--;
+         gs = ge+1;
+         remF -= aFreq;
+      }
+   }
+
+   /*--- 
+      Iterate up to BZ_N_ITERS times to improve the tables.
+   ---*/
+   for (iter = 0; iter < BZ_N_ITERS; iter++) {
+
+      for (t = 0; t < nGroups; t++) fave[t] = 0;
+
+      for (t = 0; t < nGroups; t++)
+         for (v = 0; v < alphaSize; v++)
+            s->rfreq[t][v] = 0;
+
+      /*---
+        Set up an auxiliary length table which is used to fast-track
+	the common case (nGroups == 6). 
+      ---*/
+      if (nGroups == 6) {
+         for (v = 0; v < alphaSize; v++) {
+            s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v];
+            s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v];
+            s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v];
+	 }
+      }
+
+      nSelectors = 0;
+      totc = 0;
+      gs = 0;
+      while (True) {
+
+         /*--- Set group start & end marks. --*/
+         if (gs >= s->nMTF) break;
+         ge = gs + BZ_G_SIZE - 1; 
+         if (ge >= s->nMTF) ge = s->nMTF-1;
+
+         /*-- 
+            Calculate the cost of this group as coded
+            by each of the coding tables.
+         --*/
+         for (t = 0; t < nGroups; t++) cost[t] = 0;
+
+         if (nGroups == 6 && 50 == ge-gs+1) {
+            /*--- fast track the common case ---*/
+            register UInt32 cost01, cost23, cost45;
+            register UInt16 icv;
+            cost01 = cost23 = cost45 = 0;
+
+#           define BZ_ITER(nn)                \
+               icv = mtfv[gs+(nn)];           \
+               cost01 += s->len_pack[icv][0]; \
+               cost23 += s->len_pack[icv][1]; \
+               cost45 += s->len_pack[icv][2]; \
+
+            BZ_ITER(0);  BZ_ITER(1);  BZ_ITER(2);  BZ_ITER(3);  BZ_ITER(4);
+            BZ_ITER(5);  BZ_ITER(6);  BZ_ITER(7);  BZ_ITER(8);  BZ_ITER(9);
+            BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14);
+            BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19);
+            BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24);
+            BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29);
+            BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34);
+            BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39);
+            BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44);
+            BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49);
+
+#           undef BZ_ITER
+
+            cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16;
+            cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16;
+            cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16;
+
+         } else {
+	    /*--- slow version which correctly handles all situations ---*/
+            for (i = gs; i <= ge; i++) { 
+               UInt16 icv = mtfv[i];
+               for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv];
+            }
+         }
+ 
+         /*-- 
+            Find the coding table which is best for this group,
+            and record its identity in the selector table.
+         --*/
+         bc = 999999999; bt = -1;
+         for (t = 0; t < nGroups; t++)
+            if (cost[t] < bc) { bc = cost[t]; bt = t; };
+         totc += bc;
+         fave[bt]++;
+         s->selector[nSelectors] = bt;
+         nSelectors++;
+
+         /*-- 
+            Increment the symbol frequencies for the selected table.
+          --*/
+         if (nGroups == 6 && 50 == ge-gs+1) {
+            /*--- fast track the common case ---*/
+
+#           define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++
+
+            BZ_ITUR(0);  BZ_ITUR(1);  BZ_ITUR(2);  BZ_ITUR(3);  BZ_ITUR(4);
+            BZ_ITUR(5);  BZ_ITUR(6);  BZ_ITUR(7);  BZ_ITUR(8);  BZ_ITUR(9);
+            BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14);
+            BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19);
+            BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24);
+            BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29);
+            BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34);
+            BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39);
+            BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44);
+            BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49);
+
+#           undef BZ_ITUR
+
+         } else {
+	    /*--- slow version which correctly handles all situations ---*/
+            for (i = gs; i <= ge; i++)
+               s->rfreq[bt][ mtfv[i] ]++;
+         }
+
+         gs = ge+1;
+      }
+      if (s->verbosity >= 3) {
+         VPrintf2 ( "      pass %d: size is %d, grp uses are ", 
+                   iter+1, totc/8 );
+         for (t = 0; t < nGroups; t++)
+            VPrintf1 ( "%d ", fave[t] );
+         VPrintf0 ( "\n" );
+      }
+
+      /*--
+        Recompute the tables based on the accumulated frequencies.
+      --*/
+      /* maxLen was changed from 20 to 17 in bzip2-1.0.3.  See 
+         comment in huffman.c for details. */
+      for (t = 0; t < nGroups; t++)
+         BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]), 
+                                 alphaSize, 17 /*20*/ );
+   }
+
+
+   AssertH( nGroups < 8, 3002 );
+   AssertH( nSelectors < 32768 &&
+            nSelectors <= BZ_MAX_SELECTORS,
+            3003 );
+
+
+   /*--- Compute MTF values for the selectors. ---*/
+   {
+      UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp;
+      for (i = 0; i < nGroups; i++) pos[i] = i;
+      for (i = 0; i < nSelectors; i++) {
+         ll_i = s->selector[i];
+         j = 0;
+         tmp = pos[j];
+         while ( ll_i != tmp ) {
+            j++;
+            tmp2 = tmp;
+            tmp = pos[j];
+            pos[j] = tmp2;
+         };
+         pos[0] = tmp;
+         s->selectorMtf[i] = j;
+      }
+   };
+
+   /*--- Assign actual codes for the tables. --*/
+   for (t = 0; t < nGroups; t++) {
+      minLen = 32;
+      maxLen = 0;
+      for (i = 0; i < alphaSize; i++) {
+         if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
+         if (s->len[t][i] < minLen) minLen = s->len[t][i];
+      }
+      AssertH ( !(maxLen > 17 /*20*/ ), 3004 );
+      AssertH ( !(minLen < 1),  3005 );
+      BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]), 
+                          minLen, maxLen, alphaSize );
+   }
+
+   /*--- Transmit the mapping table. ---*/
+   { 
+      Bool inUse16[16];
+      for (i = 0; i < 16; i++) {
+          inUse16[i] = False;
+          for (j = 0; j < 16; j++)
+             if (s->inUse[i * 16 + j]) inUse16[i] = True;
+      }
+     
+      nBytes = s->numZ;
+      for (i = 0; i < 16; i++)
+         if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0);
+
+      for (i = 0; i < 16; i++)
+         if (inUse16[i])
+            for (j = 0; j < 16; j++) {
+               if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0);
+            }
+
+      if (s->verbosity >= 3) 
+         VPrintf1( "      bytes: mapping %d, ", s->numZ-nBytes );
+   }
+
+   /*--- Now the selectors. ---*/
+   nBytes = s->numZ;
+   bsW ( s, 3, nGroups );
+   bsW ( s, 15, nSelectors );
+   for (i = 0; i < nSelectors; i++) { 
+      for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1);
+      bsW(s,1,0);
+   }
+   if (s->verbosity >= 3)
+      VPrintf1( "selectors %d, ", s->numZ-nBytes );
+
+   /*--- Now the coding tables. ---*/
+   nBytes = s->numZ;
+
+   for (t = 0; t < nGroups; t++) {
+      Int32 curr = s->len[t][0];
+      bsW ( s, 5, curr );
+      for (i = 0; i < alphaSize; i++) {
+         while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ };
+         while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ };
+         bsW ( s, 1, 0 );
+      }
+   }
+
+   if (s->verbosity >= 3)
+      VPrintf1 ( "code lengths %d, ", s->numZ-nBytes );
+
+   /*--- And finally, the block data proper ---*/
+   nBytes = s->numZ;
+   selCtr = 0;
+   gs = 0;
+   while (True) {
+      if (gs >= s->nMTF) break;
+      ge = gs + BZ_G_SIZE - 1; 
+      if (ge >= s->nMTF) ge = s->nMTF-1;
+      AssertH ( s->selector[selCtr] < nGroups, 3006 );
+
+      if (nGroups == 6 && 50 == ge-gs+1) {
+            /*--- fast track the common case ---*/
+            UInt16 mtfv_i;
+            UChar* s_len_sel_selCtr 
+               = &(s->len[s->selector[selCtr]][0]);
+            Int32* s_code_sel_selCtr
+               = &(s->code[s->selector[selCtr]][0]);
+
+#           define BZ_ITAH(nn)                      \
+               mtfv_i = mtfv[gs+(nn)];              \
+               bsW ( s,                             \
+                     s_len_sel_selCtr[mtfv_i],      \
+                     s_code_sel_selCtr[mtfv_i] )
+
+            BZ_ITAH(0);  BZ_ITAH(1);  BZ_ITAH(2);  BZ_ITAH(3);  BZ_ITAH(4);
+            BZ_ITAH(5);  BZ_ITAH(6);  BZ_ITAH(7);  BZ_ITAH(8);  BZ_ITAH(9);
+            BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14);
+            BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19);
+            BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24);
+            BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29);
+            BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34);
+            BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39);
+            BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44);
+            BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49);
+
+#           undef BZ_ITAH
+
+      } else {
+	 /*--- slow version which correctly handles all situations ---*/
+         for (i = gs; i <= ge; i++) {
+            bsW ( s, 
+                  s->len  [s->selector[selCtr]] [mtfv[i]],
+                  s->code [s->selector[selCtr]] [mtfv[i]] );
+         }
+      }
+
+
+      gs = ge+1;
+      selCtr++;
+   }
+   AssertH( selCtr == nSelectors, 3007 );
+
+   if (s->verbosity >= 3)
+      VPrintf1( "codes %d\n", s->numZ-nBytes );
+}
+
+
+/*---------------------------------------------------*/
+void BZ2_compressBlock ( EState* s, Bool is_last_block )
+{
+   if (s->nblock > 0) {
+
+      BZ_FINALISE_CRC ( s->blockCRC );
+      s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31);
+      s->combinedCRC ^= s->blockCRC;
+      if (s->blockNo > 1) s->numZ = 0;
+
+      if (s->verbosity >= 2)
+         VPrintf4( "    block %d: crc = 0x%08x, "
+                   "combined CRC = 0x%08x, size = %d\n",
+                   s->blockNo, s->blockCRC, s->combinedCRC, s->nblock );
+
+      BZ2_blockSort ( s );
+   }
+
+   s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]);
+
+   /*-- If this is the first block, create the stream header. --*/
+   if (s->blockNo == 1) {
+      BZ2_bsInitWrite ( s );
+      bsPutUChar ( s, BZ_HDR_B );
+      bsPutUChar ( s, BZ_HDR_Z );
+      bsPutUChar ( s, BZ_HDR_h );
+      bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) );
+   }
+
+   if (s->nblock > 0) {
+
+      bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 );
+      bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 );
+      bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 );
+
+      /*-- Now the block's CRC, so it is in a known place. --*/
+      bsPutUInt32 ( s, s->blockCRC );
+
+      /*-- 
+         Now a single bit indicating (non-)randomisation. 
+         As of version 0.9.5, we use a better sorting algorithm
+         which makes randomisation unnecessary.  So always set
+         the randomised bit to 'no'.  Of course, the decoder
+         still needs to be able to handle randomised blocks
+         so as to maintain backwards compatibility with
+         older versions of bzip2.
+      --*/
+      bsW(s,1,0);
+
+      bsW ( s, 24, s->origPtr );
+      generateMTFValues ( s );
+      sendMTFValues ( s );
+   }
+
+
+   /*-- If this is the last block, add the stream trailer. --*/
+   if (is_last_block) {
+
+      bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 );
+      bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 );
+      bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 );
+      bsPutUInt32 ( s, s->combinedCRC );
+      if (s->verbosity >= 2)
+         VPrintf1( "    final combined CRC = 0x%08x\n   ", s->combinedCRC );
+      bsFinishWrite ( s );
+   }
+}
+
+
+/*-------------------------------------------------------------*/
+/*--- end                                        compress.c ---*/
+/*-------------------------------------------------------------*/
diff --git a/libraries/bzip2/crctable.c b/libraries/bzip2/crctable.c
new file mode 100644
index 000000000..2b33c2535
--- /dev/null
+++ b/libraries/bzip2/crctable.c
@@ -0,0 +1,104 @@
+
+/*-------------------------------------------------------------*/
+/*--- Table for doing CRCs                                  ---*/
+/*---                                            crctable.c ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+   This file is part of bzip2/libbzip2, a program and library for
+   lossless, block-sorting data compression.
+
+   bzip2/libbzip2 version 1.0.8 of 13 July 2019
+   Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
+
+   Please read the WARNING, DISCLAIMER and PATENTS sections in the 
+   README file.
+
+   This program is released under the terms of the license contained
+   in the file LICENSE.
+   ------------------------------------------------------------------ */
+
+
+#include "bzlib_private.h"
+
+/*--
+  I think this is an implementation of the AUTODIN-II,
+  Ethernet & FDDI 32-bit CRC standard.  Vaguely derived
+  from code by Rob Warnock, in Section 51 of the
+  comp.compression FAQ.
+--*/
+
+UInt32 BZ2_crc32Table[256] = {
+
+   /*-- Ugly, innit? --*/
+
+   0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L,
+   0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L,
+   0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L,
+   0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL,
+   0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L,
+   0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L,
+   0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L,
+   0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL,
+   0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L,
+   0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L,
+   0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L,
+   0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL,
+   0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L,
+   0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L,
+   0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L,
+   0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL,
+   0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL,
+   0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L,
+   0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L,
+   0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL,
+   0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL,
+   0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L,
+   0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L,
+   0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL,
+   0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL,
+   0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L,
+   0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L,
+   0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL,
+   0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL,
+   0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L,
+   0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L,
+   0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL,
+   0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L,
+   0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL,
+   0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL,
+   0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L,
+   0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L,
+   0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL,
+   0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL,
+   0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L,
+   0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L,
+   0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL,
+   0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL,
+   0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L,
+   0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L,
+   0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL,
+   0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL,
+   0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L,
+   0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L,
+   0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL,
+   0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L,
+   0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L,
+   0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L,
+   0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL,
+   0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L,
+   0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L,
+   0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L,
+   0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL,
+   0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L,
+   0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L,
+   0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L,
+   0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL,
+   0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L,
+   0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L
+};
+
+
+/*-------------------------------------------------------------*/
+/*--- end                                        crctable.c ---*/
+/*-------------------------------------------------------------*/
diff --git a/libraries/bzip2/decompress.c b/libraries/bzip2/decompress.c
new file mode 100644
index 000000000..a1a0bac89
--- /dev/null
+++ b/libraries/bzip2/decompress.c
@@ -0,0 +1,652 @@
+
+/*-------------------------------------------------------------*/
+/*--- Decompression machinery                               ---*/
+/*---                                          decompress.c ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+   This file is part of bzip2/libbzip2, a program and library for
+   lossless, block-sorting data compression.
+
+   bzip2/libbzip2 version 1.0.8 of 13 July 2019
+   Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
+
+   Please read the WARNING, DISCLAIMER and PATENTS sections in the 
+   README file.
+
+   This program is released under the terms of the license contained
+   in the file LICENSE.
+   ------------------------------------------------------------------ */
+
+
+#include "bzlib_private.h"
+
+
+/*---------------------------------------------------*/
+static
+void makeMaps_d ( DState* s )
+{
+   Int32 i;
+   s->nInUse = 0;
+   for (i = 0; i < 256; i++)
+      if (s->inUse[i]) {
+         s->seqToUnseq[s->nInUse] = i;
+         s->nInUse++;
+      }
+}
+
+
+/*---------------------------------------------------*/
+#define RETURN(rrr)                               \
+   { retVal = rrr; goto save_state_and_return; };
+
+#define GET_BITS(lll,vvv,nnn)                     \
+   case lll: s->state = lll;                      \
+   while (True) {                                 \
+      if (s->bsLive >= nnn) {                     \
+         UInt32 v;                                \
+         v = (s->bsBuff >>                        \
+             (s->bsLive-nnn)) & ((1 << nnn)-1);   \
+         s->bsLive -= nnn;                        \
+         vvv = v;                                 \
+         break;                                   \
+      }                                           \
+      if (s->strm->avail_in == 0) RETURN(BZ_OK);  \
+      s->bsBuff                                   \
+         = (s->bsBuff << 8) |                     \
+           ((UInt32)                              \
+              (*((UChar*)(s->strm->next_in))));   \
+      s->bsLive += 8;                             \
+      s->strm->next_in++;                         \
+      s->strm->avail_in--;                        \
+      s->strm->total_in_lo32++;                   \
+      if (s->strm->total_in_lo32 == 0)            \
+         s->strm->total_in_hi32++;                \
+   }
+
+#define GET_UCHAR(lll,uuu)                        \
+   GET_BITS(lll,uuu,8)
+
+#define GET_BIT(lll,uuu)                          \
+   GET_BITS(lll,uuu,1)
+
+/*---------------------------------------------------*/
+#define GET_MTF_VAL(label1,label2,lval)           \
+{                                                 \
+   if (groupPos == 0) {                           \
+      groupNo++;                                  \
+      if (groupNo >= nSelectors)                  \
+         RETURN(BZ_DATA_ERROR);                   \
+      groupPos = BZ_G_SIZE;                       \
+      gSel = s->selector[groupNo];                \
+      gMinlen = s->minLens[gSel];                 \
+      gLimit = &(s->limit[gSel][0]);              \
+      gPerm = &(s->perm[gSel][0]);                \
+      gBase = &(s->base[gSel][0]);                \
+   }                                              \
+   groupPos--;                                    \
+   zn = gMinlen;                                  \
+   GET_BITS(label1, zvec, zn);                    \
+   while (1) {                                    \
+      if (zn > 20 /* the longest code */)         \
+         RETURN(BZ_DATA_ERROR);                   \
+      if (zvec <= gLimit[zn]) break;              \
+      zn++;                                       \
+      GET_BIT(label2, zj);                        \
+      zvec = (zvec << 1) | zj;                    \
+   };                                             \
+   if (zvec - gBase[zn] < 0                       \
+       || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE)  \
+      RETURN(BZ_DATA_ERROR);                      \
+   lval = gPerm[zvec - gBase[zn]];                \
+}
+
+
+/*---------------------------------------------------*/
+Int32 BZ2_decompress ( DState* s )
+{
+   UChar      uc;
+   Int32      retVal;
+   Int32      minLen, maxLen;
+   bz_stream* strm = s->strm;
+
+   /* stuff that needs to be saved/restored */
+   Int32  i;
+   Int32  j;
+   Int32  t;
+   Int32  alphaSize;
+   Int32  nGroups;
+   Int32  nSelectors;
+   Int32  EOB;
+   Int32  groupNo;
+   Int32  groupPos;
+   Int32  nextSym;
+   Int32  nblockMAX;
+   Int32  nblock;
+   Int32  es;
+   Int32  N;
+   Int32  curr;
+   Int32  zt;
+   Int32  zn; 
+   Int32  zvec;
+   Int32  zj;
+   Int32  gSel;
+   Int32  gMinlen;
+   Int32* gLimit;
+   Int32* gBase;
+   Int32* gPerm;
+
+   if (s->state == BZ_X_MAGIC_1) {
+      /*initialise the save area*/
+      s->save_i           = 0;
+      s->save_j           = 0;
+      s->save_t           = 0;
+      s->save_alphaSize   = 0;
+      s->save_nGroups     = 0;
+      s->save_nSelectors  = 0;
+      s->save_EOB         = 0;
+      s->save_groupNo     = 0;
+      s->save_groupPos    = 0;
+      s->save_nextSym     = 0;
+      s->save_nblockMAX   = 0;
+      s->save_nblock      = 0;
+      s->save_es          = 0;
+      s->save_N           = 0;
+      s->save_curr        = 0;
+      s->save_zt          = 0;
+      s->save_zn          = 0;
+      s->save_zvec        = 0;
+      s->save_zj          = 0;
+      s->save_gSel        = 0;
+      s->save_gMinlen     = 0;
+      s->save_gLimit      = NULL;
+      s->save_gBase       = NULL;
+      s->save_gPerm       = NULL;
+   }
+
+   /*restore from the save area*/
+   i           = s->save_i;
+   j           = s->save_j;
+   t           = s->save_t;
+   alphaSize   = s->save_alphaSize;
+   nGroups     = s->save_nGroups;
+   nSelectors  = s->save_nSelectors;
+   EOB         = s->save_EOB;
+   groupNo     = s->save_groupNo;
+   groupPos    = s->save_groupPos;
+   nextSym     = s->save_nextSym;
+   nblockMAX   = s->save_nblockMAX;
+   nblock      = s->save_nblock;
+   es          = s->save_es;
+   N           = s->save_N;
+   curr        = s->save_curr;
+   zt          = s->save_zt;
+   zn          = s->save_zn; 
+   zvec        = s->save_zvec;
+   zj          = s->save_zj;
+   gSel        = s->save_gSel;
+   gMinlen     = s->save_gMinlen;
+   gLimit      = s->save_gLimit;
+   gBase       = s->save_gBase;
+   gPerm       = s->save_gPerm;
+
+   retVal = BZ_OK;
+
+   switch (s->state) {
+
+      GET_UCHAR(BZ_X_MAGIC_1, uc);
+      if (uc != BZ_HDR_B) RETURN(BZ_DATA_ERROR_MAGIC);
+
+      GET_UCHAR(BZ_X_MAGIC_2, uc);
+      if (uc != BZ_HDR_Z) RETURN(BZ_DATA_ERROR_MAGIC);
+
+      GET_UCHAR(BZ_X_MAGIC_3, uc)
+      if (uc != BZ_HDR_h) RETURN(BZ_DATA_ERROR_MAGIC);
+
+      GET_BITS(BZ_X_MAGIC_4, s->blockSize100k, 8)
+      if (s->blockSize100k < (BZ_HDR_0 + 1) || 
+          s->blockSize100k > (BZ_HDR_0 + 9)) RETURN(BZ_DATA_ERROR_MAGIC);
+      s->blockSize100k -= BZ_HDR_0;
+
+      if (s->smallDecompress) {
+         s->ll16 = BZALLOC( s->blockSize100k * 100000 * sizeof(UInt16) );
+         s->ll4  = BZALLOC( 
+                      ((1 + s->blockSize100k * 100000) >> 1) * sizeof(UChar) 
+                   );
+         if (s->ll16 == NULL || s->ll4 == NULL) RETURN(BZ_MEM_ERROR);
+      } else {
+         s->tt  = BZALLOC( s->blockSize100k * 100000 * sizeof(Int32) );
+         if (s->tt == NULL) RETURN(BZ_MEM_ERROR);
+      }
+
+      GET_UCHAR(BZ_X_BLKHDR_1, uc);
+
+      if (uc == 0x17) goto endhdr_2;
+      if (uc != 0x31) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_BLKHDR_2, uc);
+      if (uc != 0x41) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_BLKHDR_3, uc);
+      if (uc != 0x59) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_BLKHDR_4, uc);
+      if (uc != 0x26) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_BLKHDR_5, uc);
+      if (uc != 0x53) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_BLKHDR_6, uc);
+      if (uc != 0x59) RETURN(BZ_DATA_ERROR);
+
+      s->currBlockNo++;
+      if (s->verbosity >= 2)
+         VPrintf1 ( "\n    [%d: huff+mtf ", s->currBlockNo );
+ 
+      s->storedBlockCRC = 0;
+      GET_UCHAR(BZ_X_BCRC_1, uc);
+      s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
+      GET_UCHAR(BZ_X_BCRC_2, uc);
+      s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
+      GET_UCHAR(BZ_X_BCRC_3, uc);
+      s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
+      GET_UCHAR(BZ_X_BCRC_4, uc);
+      s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
+
+      GET_BITS(BZ_X_RANDBIT, s->blockRandomised, 1);
+
+      s->origPtr = 0;
+      GET_UCHAR(BZ_X_ORIGPTR_1, uc);
+      s->origPtr = (s->origPtr << 8) | ((Int32)uc);
+      GET_UCHAR(BZ_X_ORIGPTR_2, uc);
+      s->origPtr = (s->origPtr << 8) | ((Int32)uc);
+      GET_UCHAR(BZ_X_ORIGPTR_3, uc);
+      s->origPtr = (s->origPtr << 8) | ((Int32)uc);
+
+      if (s->origPtr < 0)
+         RETURN(BZ_DATA_ERROR);
+      if (s->origPtr > 10 + 100000*s->blockSize100k) 
+         RETURN(BZ_DATA_ERROR);
+
+      /*--- Receive the mapping table ---*/
+      for (i = 0; i < 16; i++) {
+         GET_BIT(BZ_X_MAPPING_1, uc);
+         if (uc == 1) 
+            s->inUse16[i] = True; else 
+            s->inUse16[i] = False;
+      }
+
+      for (i = 0; i < 256; i++) s->inUse[i] = False;
+
+      for (i = 0; i < 16; i++)
+         if (s->inUse16[i])
+            for (j = 0; j < 16; j++) {
+               GET_BIT(BZ_X_MAPPING_2, uc);
+               if (uc == 1) s->inUse[i * 16 + j] = True;
+            }
+      makeMaps_d ( s );
+      if (s->nInUse == 0) RETURN(BZ_DATA_ERROR);
+      alphaSize = s->nInUse+2;
+
+      /*--- Now the selectors ---*/
+      GET_BITS(BZ_X_SELECTOR_1, nGroups, 3);
+      if (nGroups < 2 || nGroups > BZ_N_GROUPS) RETURN(BZ_DATA_ERROR);
+      GET_BITS(BZ_X_SELECTOR_2, nSelectors, 15);
+      if (nSelectors < 1) RETURN(BZ_DATA_ERROR);
+      for (i = 0; i < nSelectors; i++) {
+         j = 0;
+         while (True) {
+            GET_BIT(BZ_X_SELECTOR_3, uc);
+            if (uc == 0) break;
+            j++;
+            if (j >= nGroups) RETURN(BZ_DATA_ERROR);
+         }
+         /* Having more than BZ_MAX_SELECTORS doesn't make much sense
+            since they will never be used, but some implementations might
+            "round up" the number of selectors, so just ignore those. */
+         if (i < BZ_MAX_SELECTORS)
+           s->selectorMtf[i] = j;
+      }
+      if (nSelectors > BZ_MAX_SELECTORS)
+        nSelectors = BZ_MAX_SELECTORS;
+
+      /*--- Undo the MTF values for the selectors. ---*/
+      {
+         UChar pos[BZ_N_GROUPS], tmp, v;
+         for (v = 0; v < nGroups; v++) pos[v] = v;
+   
+         for (i = 0; i < nSelectors; i++) {
+            v = s->selectorMtf[i];
+            tmp = pos[v];
+            while (v > 0) { pos[v] = pos[v-1]; v--; }
+            pos[0] = tmp;
+            s->selector[i] = tmp;
+         }
+      }
+
+      /*--- Now the coding tables ---*/
+      for (t = 0; t < nGroups; t++) {
+         GET_BITS(BZ_X_CODING_1, curr, 5);
+         for (i = 0; i < alphaSize; i++) {
+            while (True) {
+               if (curr < 1 || curr > 20) RETURN(BZ_DATA_ERROR);
+               GET_BIT(BZ_X_CODING_2, uc);
+               if (uc == 0) break;
+               GET_BIT(BZ_X_CODING_3, uc);
+               if (uc == 0) curr++; else curr--;
+            }
+            s->len[t][i] = curr;
+         }
+      }
+
+      /*--- Create the Huffman decoding tables ---*/
+      for (t = 0; t < nGroups; t++) {
+         minLen = 32;
+         maxLen = 0;
+         for (i = 0; i < alphaSize; i++) {
+            if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
+            if (s->len[t][i] < minLen) minLen = s->len[t][i];
+         }
+         BZ2_hbCreateDecodeTables ( 
+            &(s->limit[t][0]), 
+            &(s->base[t][0]), 
+            &(s->perm[t][0]), 
+            &(s->len[t][0]),
+            minLen, maxLen, alphaSize
+         );
+         s->minLens[t] = minLen;
+      }
+
+      /*--- Now the MTF values ---*/
+
+      EOB      = s->nInUse+1;
+      nblockMAX = 100000 * s->blockSize100k;
+      groupNo  = -1;
+      groupPos = 0;
+
+      for (i = 0; i <= 255; i++) s->unzftab[i] = 0;
+
+      /*-- MTF init --*/
+      {
+         Int32 ii, jj, kk;
+         kk = MTFA_SIZE-1;
+         for (ii = 256 / MTFL_SIZE - 1; ii >= 0; ii--) {
+            for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
+               s->mtfa[kk] = (UChar)(ii * MTFL_SIZE + jj);
+               kk--;
+            }
+            s->mtfbase[ii] = kk + 1;
+         }
+      }
+      /*-- end MTF init --*/
+
+      nblock = 0;
+      GET_MTF_VAL(BZ_X_MTF_1, BZ_X_MTF_2, nextSym);
+
+      while (True) {
+
+         if (nextSym == EOB) break;
+
+         if (nextSym == BZ_RUNA || nextSym == BZ_RUNB) {
+
+            es = -1;
+            N = 1;
+            do {
+               /* Check that N doesn't get too big, so that es doesn't
+                  go negative.  The maximum value that can be
+                  RUNA/RUNB encoded is equal to the block size (post
+                  the initial RLE), viz, 900k, so bounding N at 2
+                  million should guard against overflow without
+                  rejecting any legitimate inputs. */
+               if (N >= 2*1024*1024) RETURN(BZ_DATA_ERROR);
+               if (nextSym == BZ_RUNA) es = es + (0+1) * N; else
+               if (nextSym == BZ_RUNB) es = es + (1+1) * N;
+               N = N * 2;
+               GET_MTF_VAL(BZ_X_MTF_3, BZ_X_MTF_4, nextSym);
+            }
+               while (nextSym == BZ_RUNA || nextSym == BZ_RUNB);
+
+            es++;
+            uc = s->seqToUnseq[ s->mtfa[s->mtfbase[0]] ];
+            s->unzftab[uc] += es;
+
+            if (s->smallDecompress)
+               while (es > 0) {
+                  if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
+                  s->ll16[nblock] = (UInt16)uc;
+                  nblock++;
+                  es--;
+               }
+            else
+               while (es > 0) {
+                  if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
+                  s->tt[nblock] = (UInt32)uc;
+                  nblock++;
+                  es--;
+               };
+
+            continue;
+
+         } else {
+
+            if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
+
+            /*-- uc = MTF ( nextSym-1 ) --*/
+            {
+               Int32 ii, jj, kk, pp, lno, off;
+               UInt32 nn;
+               nn = (UInt32)(nextSym - 1);
+
+               if (nn < MTFL_SIZE) {
+                  /* avoid general-case expense */
+                  pp = s->mtfbase[0];
+                  uc = s->mtfa[pp+nn];
+                  while (nn > 3) {
+                     Int32 z = pp+nn;
+                     s->mtfa[(z)  ] = s->mtfa[(z)-1];
+                     s->mtfa[(z)-1] = s->mtfa[(z)-2];
+                     s->mtfa[(z)-2] = s->mtfa[(z)-3];
+                     s->mtfa[(z)-3] = s->mtfa[(z)-4];
+                     nn -= 4;
+                  }
+                  while (nn > 0) { 
+                     s->mtfa[(pp+nn)] = s->mtfa[(pp+nn)-1]; nn--; 
+                  };
+                  s->mtfa[pp] = uc;
+               } else { 
+                  /* general case */
+                  lno = nn / MTFL_SIZE;
+                  off = nn % MTFL_SIZE;
+                  pp = s->mtfbase[lno] + off;
+                  uc = s->mtfa[pp];
+                  while (pp > s->mtfbase[lno]) { 
+                     s->mtfa[pp] = s->mtfa[pp-1]; pp--; 
+                  };
+                  s->mtfbase[lno]++;
+                  while (lno > 0) {
+                     s->mtfbase[lno]--;
+                     s->mtfa[s->mtfbase[lno]] 
+                        = s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1];
+                     lno--;
+                  }
+                  s->mtfbase[0]--;
+                  s->mtfa[s->mtfbase[0]] = uc;
+                  if (s->mtfbase[0] == 0) {
+                     kk = MTFA_SIZE-1;
+                     for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) {
+                        for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
+                           s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj];
+                           kk--;
+                        }
+                        s->mtfbase[ii] = kk + 1;
+                     }
+                  }
+               }
+            }
+            /*-- end uc = MTF ( nextSym-1 ) --*/
+
+            s->unzftab[s->seqToUnseq[uc]]++;
+            if (s->smallDecompress)
+               s->ll16[nblock] = (UInt16)(s->seqToUnseq[uc]); else
+               s->tt[nblock]   = (UInt32)(s->seqToUnseq[uc]);
+            nblock++;
+
+            GET_MTF_VAL(BZ_X_MTF_5, BZ_X_MTF_6, nextSym);
+            continue;
+         }
+      }
+
+      /* Now we know what nblock is, we can do a better sanity
+         check on s->origPtr.
+      */
+      if (s->origPtr < 0 || s->origPtr >= nblock)
+         RETURN(BZ_DATA_ERROR);
+
+      /*-- Set up cftab to facilitate generation of T^(-1) --*/
+      /* Check: unzftab entries in range. */
+      for (i = 0; i <= 255; i++) {
+         if (s->unzftab[i] < 0 || s->unzftab[i] > nblock)
+            RETURN(BZ_DATA_ERROR);
+      }
+      /* Actually generate cftab. */
+      s->cftab[0] = 0;
+      for (i = 1; i <= 256; i++) s->cftab[i] = s->unzftab[i-1];
+      for (i = 1; i <= 256; i++) s->cftab[i] += s->cftab[i-1];
+      /* Check: cftab entries in range. */
+      for (i = 0; i <= 256; i++) {
+         if (s->cftab[i] < 0 || s->cftab[i] > nblock) {
+            /* s->cftab[i] can legitimately be == nblock */
+            RETURN(BZ_DATA_ERROR);
+         }
+      }
+      /* Check: cftab entries non-descending. */
+      for (i = 1; i <= 256; i++) {
+         if (s->cftab[i-1] > s->cftab[i]) {
+            RETURN(BZ_DATA_ERROR);
+         }
+      }
+
+      s->state_out_len = 0;
+      s->state_out_ch  = 0;
+      BZ_INITIALISE_CRC ( s->calculatedBlockCRC );
+      s->state = BZ_X_OUTPUT;
+      if (s->verbosity >= 2) VPrintf0 ( "rt+rld" );
+
+      if (s->smallDecompress) {
+
+         /*-- Make a copy of cftab, used in generation of T --*/
+         for (i = 0; i <= 256; i++) s->cftabCopy[i] = s->cftab[i];
+
+         /*-- compute the T vector --*/
+         for (i = 0; i < nblock; i++) {
+            uc = (UChar)(s->ll16[i]);
+            SET_LL(i, s->cftabCopy[uc]);
+            s->cftabCopy[uc]++;
+         }
+
+         /*-- Compute T^(-1) by pointer reversal on T --*/
+         i = s->origPtr;
+         j = GET_LL(i);
+         do {
+            Int32 tmp = GET_LL(j);
+            SET_LL(j, i);
+            i = j;
+            j = tmp;
+         }
+            while (i != s->origPtr);
+
+         s->tPos = s->origPtr;
+         s->nblock_used = 0;
+         if (s->blockRandomised) {
+            BZ_RAND_INIT_MASK;
+            BZ_GET_SMALL(s->k0); s->nblock_used++;
+            BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK; 
+         } else {
+            BZ_GET_SMALL(s->k0); s->nblock_used++;
+         }
+
+      } else {
+
+         /*-- compute the T^(-1) vector --*/
+         for (i = 0; i < nblock; i++) {
+            uc = (UChar)(s->tt[i] & 0xff);
+            s->tt[s->cftab[uc]] |= (i << 8);
+            s->cftab[uc]++;
+         }
+
+         s->tPos = s->tt[s->origPtr] >> 8;
+         s->nblock_used = 0;
+         if (s->blockRandomised) {
+            BZ_RAND_INIT_MASK;
+            BZ_GET_FAST(s->k0); s->nblock_used++;
+            BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK; 
+         } else {
+            BZ_GET_FAST(s->k0); s->nblock_used++;
+         }
+
+      }
+
+      RETURN(BZ_OK);
+
+
+
+    endhdr_2:
+
+      GET_UCHAR(BZ_X_ENDHDR_2, uc);
+      if (uc != 0x72) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_ENDHDR_3, uc);
+      if (uc != 0x45) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_ENDHDR_4, uc);
+      if (uc != 0x38) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_ENDHDR_5, uc);
+      if (uc != 0x50) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_ENDHDR_6, uc);
+      if (uc != 0x90) RETURN(BZ_DATA_ERROR);
+
+      s->storedCombinedCRC = 0;
+      GET_UCHAR(BZ_X_CCRC_1, uc);
+      s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
+      GET_UCHAR(BZ_X_CCRC_2, uc);
+      s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
+      GET_UCHAR(BZ_X_CCRC_3, uc);
+      s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
+      GET_UCHAR(BZ_X_CCRC_4, uc);
+      s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
+
+      s->state = BZ_X_IDLE;
+      RETURN(BZ_STREAM_END);
+
+      default: AssertH ( False, 4001 );
+   }
+
+   AssertH ( False, 4002 );
+
+   save_state_and_return:
+
+   s->save_i           = i;
+   s->save_j           = j;
+   s->save_t           = t;
+   s->save_alphaSize   = alphaSize;
+   s->save_nGroups     = nGroups;
+   s->save_nSelectors  = nSelectors;
+   s->save_EOB         = EOB;
+   s->save_groupNo     = groupNo;
+   s->save_groupPos    = groupPos;
+   s->save_nextSym     = nextSym;
+   s->save_nblockMAX   = nblockMAX;
+   s->save_nblock      = nblock;
+   s->save_es          = es;
+   s->save_N           = N;
+   s->save_curr        = curr;
+   s->save_zt          = zt;
+   s->save_zn          = zn;
+   s->save_zvec        = zvec;
+   s->save_zj          = zj;
+   s->save_gSel        = gSel;
+   s->save_gMinlen     = gMinlen;
+   s->save_gLimit      = gLimit;
+   s->save_gBase       = gBase;
+   s->save_gPerm       = gPerm;
+
+   return retVal;   
+}
+
+
+/*-------------------------------------------------------------*/
+/*--- end                                      decompress.c ---*/
+/*-------------------------------------------------------------*/
diff --git a/libraries/bzip2/huffman.c b/libraries/bzip2/huffman.c
new file mode 100644
index 000000000..43a1899e4
--- /dev/null
+++ b/libraries/bzip2/huffman.c
@@ -0,0 +1,205 @@
+
+/*-------------------------------------------------------------*/
+/*--- Huffman coding low-level stuff                        ---*/
+/*---                                             huffman.c ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+   This file is part of bzip2/libbzip2, a program and library for
+   lossless, block-sorting data compression.
+
+   bzip2/libbzip2 version 1.0.8 of 13 July 2019
+   Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
+
+   Please read the WARNING, DISCLAIMER and PATENTS sections in the 
+   README file.
+
+   This program is released under the terms of the license contained
+   in the file LICENSE.
+   ------------------------------------------------------------------ */
+
+
+#include "bzlib_private.h"
+
+/*---------------------------------------------------*/
+#define WEIGHTOF(zz0)  ((zz0) & 0xffffff00)
+#define DEPTHOF(zz1)   ((zz1) & 0x000000ff)
+#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
+
+#define ADDWEIGHTS(zw1,zw2)                           \
+   (WEIGHTOF(zw1)+WEIGHTOF(zw2)) |                    \
+   (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
+
+#define UPHEAP(z)                                     \
+{                                                     \
+   Int32 zz, tmp;                                     \
+   zz = z; tmp = heap[zz];                            \
+   while (weight[tmp] < weight[heap[zz >> 1]]) {      \
+      heap[zz] = heap[zz >> 1];                       \
+      zz >>= 1;                                       \
+   }                                                  \
+   heap[zz] = tmp;                                    \
+}
+
+#define DOWNHEAP(z)                                   \
+{                                                     \
+   Int32 zz, yy, tmp;                                 \
+   zz = z; tmp = heap[zz];                            \
+   while (True) {                                     \
+      yy = zz << 1;                                   \
+      if (yy > nHeap) break;                          \
+      if (yy < nHeap &&                               \
+          weight[heap[yy+1]] < weight[heap[yy]])      \
+         yy++;                                        \
+      if (weight[tmp] < weight[heap[yy]]) break;      \
+      heap[zz] = heap[yy];                            \
+      zz = yy;                                        \
+   }                                                  \
+   heap[zz] = tmp;                                    \
+}
+
+
+/*---------------------------------------------------*/
+void BZ2_hbMakeCodeLengths ( UChar *len, 
+                             Int32 *freq,
+                             Int32 alphaSize,
+                             Int32 maxLen )
+{
+   /*--
+      Nodes and heap entries run from 1.  Entry 0
+      for both the heap and nodes is a sentinel.
+   --*/
+   Int32 nNodes, nHeap, n1, n2, i, j, k;
+   Bool  tooLong;
+
+   Int32 heap   [ BZ_MAX_ALPHA_SIZE + 2 ];
+   Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ];
+   Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ]; 
+
+   for (i = 0; i < alphaSize; i++)
+      weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
+
+   while (True) {
+
+      nNodes = alphaSize;
+      nHeap = 0;
+
+      heap[0] = 0;
+      weight[0] = 0;
+      parent[0] = -2;
+
+      for (i = 1; i <= alphaSize; i++) {
+         parent[i] = -1;
+         nHeap++;
+         heap[nHeap] = i;
+         UPHEAP(nHeap);
+      }
+
+      AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 );
+   
+      while (nHeap > 1) {
+         n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
+         n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
+         nNodes++;
+         parent[n1] = parent[n2] = nNodes;
+         weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
+         parent[nNodes] = -1;
+         nHeap++;
+         heap[nHeap] = nNodes;
+         UPHEAP(nHeap);
+      }
+
+      AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 );
+
+      tooLong = False;
+      for (i = 1; i <= alphaSize; i++) {
+         j = 0;
+         k = i;
+         while (parent[k] >= 0) { k = parent[k]; j++; }
+         len[i-1] = j;
+         if (j > maxLen) tooLong = True;
+      }
+      
+      if (! tooLong) break;
+
+      /* 17 Oct 04: keep-going condition for the following loop used
+         to be 'i < alphaSize', which missed the last element,
+         theoretically leading to the possibility of the compressor
+         looping.  However, this count-scaling step is only needed if
+         one of the generated Huffman code words is longer than
+         maxLen, which up to and including version 1.0.2 was 20 bits,
+         which is extremely unlikely.  In version 1.0.3 maxLen was
+         changed to 17 bits, which has minimal effect on compression
+         ratio, but does mean this scaling step is used from time to
+         time, enough to verify that it works.
+
+         This means that bzip2-1.0.3 and later will only produce
+         Huffman codes with a maximum length of 17 bits.  However, in
+         order to preserve backwards compatibility with bitstreams
+         produced by versions pre-1.0.3, the decompressor must still
+         handle lengths of up to 20. */
+
+      for (i = 1; i <= alphaSize; i++) {
+         j = weight[i] >> 8;
+         j = 1 + (j / 2);
+         weight[i] = j << 8;
+      }
+   }
+}
+
+
+/*---------------------------------------------------*/
+void BZ2_hbAssignCodes ( Int32 *code,
+                         UChar *length,
+                         Int32 minLen,
+                         Int32 maxLen,
+                         Int32 alphaSize )
+{
+   Int32 n, vec, i;
+
+   vec = 0;
+   for (n = minLen; n <= maxLen; n++) {
+      for (i = 0; i < alphaSize; i++)
+         if (length[i] == n) { code[i] = vec; vec++; };
+      vec <<= 1;
+   }
+}
+
+
+/*---------------------------------------------------*/
+void BZ2_hbCreateDecodeTables ( Int32 *limit,
+                                Int32 *base,
+                                Int32 *perm,
+                                UChar *length,
+                                Int32 minLen,
+                                Int32 maxLen,
+                                Int32 alphaSize )
+{
+   Int32 pp, i, j, vec;
+
+   pp = 0;
+   for (i = minLen; i <= maxLen; i++)
+      for (j = 0; j < alphaSize; j++)
+         if (length[j] == i) { perm[pp] = j; pp++; };
+
+   for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0;
+   for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
+
+   for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1];
+
+   for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0;
+   vec = 0;
+
+   for (i = minLen; i <= maxLen; i++) {
+      vec += (base[i+1] - base[i]);
+      limit[i] = vec-1;
+      vec <<= 1;
+   }
+   for (i = minLen + 1; i <= maxLen; i++)
+      base[i] = ((limit[i-1] + 1) << 1) - base[i];
+}
+
+
+/*-------------------------------------------------------------*/
+/*--- end                                         huffman.c ---*/
+/*-------------------------------------------------------------*/
diff --git a/libraries/bzip2/randtable.c b/libraries/bzip2/randtable.c
new file mode 100644
index 000000000..bdc6d4a4c
--- /dev/null
+++ b/libraries/bzip2/randtable.c
@@ -0,0 +1,84 @@
+
+/*-------------------------------------------------------------*/
+/*--- Table for randomising repetitive blocks               ---*/
+/*---                                           randtable.c ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+   This file is part of bzip2/libbzip2, a program and library for
+   lossless, block-sorting data compression.
+
+   bzip2/libbzip2 version 1.0.8 of 13 July 2019
+   Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
+
+   Please read the WARNING, DISCLAIMER and PATENTS sections in the 
+   README file.
+
+   This program is released under the terms of the license contained
+   in the file LICENSE.
+   ------------------------------------------------------------------ */
+
+
+#include "bzlib_private.h"
+
+
+/*---------------------------------------------*/
+Int32 BZ2_rNums[512] = { 
+   619, 720, 127, 481, 931, 816, 813, 233, 566, 247, 
+   985, 724, 205, 454, 863, 491, 741, 242, 949, 214, 
+   733, 859, 335, 708, 621, 574, 73, 654, 730, 472, 
+   419, 436, 278, 496, 867, 210, 399, 680, 480, 51, 
+   878, 465, 811, 169, 869, 675, 611, 697, 867, 561, 
+   862, 687, 507, 283, 482, 129, 807, 591, 733, 623, 
+   150, 238, 59, 379, 684, 877, 625, 169, 643, 105, 
+   170, 607, 520, 932, 727, 476, 693, 425, 174, 647, 
+   73, 122, 335, 530, 442, 853, 695, 249, 445, 515, 
+   909, 545, 703, 919, 874, 474, 882, 500, 594, 612, 
+   641, 801, 220, 162, 819, 984, 589, 513, 495, 799, 
+   161, 604, 958, 533, 221, 400, 386, 867, 600, 782, 
+   382, 596, 414, 171, 516, 375, 682, 485, 911, 276, 
+   98, 553, 163, 354, 666, 933, 424, 341, 533, 870, 
+   227, 730, 475, 186, 263, 647, 537, 686, 600, 224, 
+   469, 68, 770, 919, 190, 373, 294, 822, 808, 206, 
+   184, 943, 795, 384, 383, 461, 404, 758, 839, 887, 
+   715, 67, 618, 276, 204, 918, 873, 777, 604, 560, 
+   951, 160, 578, 722, 79, 804, 96, 409, 713, 940, 
+   652, 934, 970, 447, 318, 353, 859, 672, 112, 785, 
+   645, 863, 803, 350, 139, 93, 354, 99, 820, 908, 
+   609, 772, 154, 274, 580, 184, 79, 626, 630, 742, 
+   653, 282, 762, 623, 680, 81, 927, 626, 789, 125, 
+   411, 521, 938, 300, 821, 78, 343, 175, 128, 250, 
+   170, 774, 972, 275, 999, 639, 495, 78, 352, 126, 
+   857, 956, 358, 619, 580, 124, 737, 594, 701, 612, 
+   669, 112, 134, 694, 363, 992, 809, 743, 168, 974, 
+   944, 375, 748, 52, 600, 747, 642, 182, 862, 81, 
+   344, 805, 988, 739, 511, 655, 814, 334, 249, 515, 
+   897, 955, 664, 981, 649, 113, 974, 459, 893, 228, 
+   433, 837, 553, 268, 926, 240, 102, 654, 459, 51, 
+   686, 754, 806, 760, 493, 403, 415, 394, 687, 700, 
+   946, 670, 656, 610, 738, 392, 760, 799, 887, 653, 
+   978, 321, 576, 617, 626, 502, 894, 679, 243, 440, 
+   680, 879, 194, 572, 640, 724, 926, 56, 204, 700, 
+   707, 151, 457, 449, 797, 195, 791, 558, 945, 679, 
+   297, 59, 87, 824, 713, 663, 412, 693, 342, 606, 
+   134, 108, 571, 364, 631, 212, 174, 643, 304, 329, 
+   343, 97, 430, 751, 497, 314, 983, 374, 822, 928, 
+   140, 206, 73, 263, 980, 736, 876, 478, 430, 305, 
+   170, 514, 364, 692, 829, 82, 855, 953, 676, 246, 
+   369, 970, 294, 750, 807, 827, 150, 790, 288, 923, 
+   804, 378, 215, 828, 592, 281, 565, 555, 710, 82, 
+   896, 831, 547, 261, 524, 462, 293, 465, 502, 56, 
+   661, 821, 976, 991, 658, 869, 905, 758, 745, 193, 
+   768, 550, 608, 933, 378, 286, 215, 979, 792, 961, 
+   61, 688, 793, 644, 986, 403, 106, 366, 905, 644, 
+   372, 567, 466, 434, 645, 210, 389, 550, 919, 135, 
+   780, 773, 635, 389, 707, 100, 626, 958, 165, 504, 
+   920, 176, 193, 713, 857, 265, 203, 50, 668, 108, 
+   645, 990, 626, 197, 510, 357, 358, 850, 858, 364, 
+   936, 638
+};
+
+
+/*-------------------------------------------------------------*/
+/*--- end                                       randtable.c ---*/
+/*-------------------------------------------------------------*/
diff --git a/libraries/dumb/CMakeLists.txt b/libraries/dumb/CMakeLists.txt
new file mode 100644
index 000000000..904ca75c3
--- /dev/null
+++ b/libraries/dumb/CMakeLists.txt
@@ -0,0 +1,121 @@
+cmake_minimum_required( VERSION 2.8.7 )
+
+make_release_only()
+
+include( CheckFunctionExists )
+include( CheckCXXCompilerFlag )
+
+set( CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -D_DEBUG -DDEBUGMODE=1" )
+
+if( ZD_CMAKE_COMPILER_IS_GNUC_COMPATIBLE )
+	set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-pointer-sign -Wno-uninitialized" )
+	if( CMAKE_C_COMPILER_ID STREQUAL "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER "4.5" )
+		set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-but-set-variable" )
+	endif()
+endif()
+
+# Enable fast flag for dumb
+set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ZD_FASTMATH_FLAG}" )
+
+CHECK_FUNCTION_EXISTS( itoa ITOA_EXISTS )
+if( NOT ITOA_EXISTS )
+	add_definitions( -DNEED_ITOA=1 )
+endif()
+
+include_directories( include )
+
+add_library( dumb STATIC
+    src/core/unload.c
+    src/core/rendsig.c
+    src/core/rendduh.c
+    src/core/register.c
+    src/core/readduh.c
+    src/core/rawsig.c
+    src/core/makeduh.c
+    src/core/loadduh.c
+    src/core/dumbfile.c
+    src/core/duhtag.c
+    src/core/duhlen.c
+    src/core/atexit.c
+    src/helpers/stdfile.c
+    src/helpers/silence.c
+    src/helpers/sampbuf.c
+    src/helpers/riff.c
+    src/helpers/resample.c
+    src/helpers/memfile.c
+    src/helpers/clickrem.c
+    src/helpers/barray.c
+    src/it/xmeffect.c
+    src/it/readxm2.c
+    src/it/readxm.c
+    src/it/readstm2.c
+    src/it/readstm.c
+    src/it/reads3m2.c
+    src/it/reads3m.c
+    src/it/readriff.c
+    src/it/readptm.c
+    src/it/readpsm.c
+    src/it/readoldpsm.c
+    src/it/readokt2.c
+    src/it/readokt.c
+    src/it/readmtm.c
+    src/it/readmod2.c
+    src/it/readmod.c
+    src/it/readdsmf.c
+    src/it/readasy.c
+    src/it/readamf2.c
+    src/it/readamf.c
+    src/it/readam.c
+    src/it/read6692.c
+    src/it/read669.c
+    src/it/ptmeffect.c
+    src/it/loadxm2.c
+    src/it/loadxm.c
+    src/it/loadstm2.c
+    src/it/loadstm.c
+    src/it/loads3m2.c
+    src/it/loads3m.c
+    src/it/loadriff2.c
+    src/it/loadriff.c
+    src/it/loadptm2.c
+    src/it/loadptm.c
+    src/it/loadpsm2.c
+    src/it/loadpsm.c
+    src/it/loadoldpsm2.c
+    src/it/loadoldpsm.c
+    src/it/loadokt2.c
+    src/it/loadokt.c
+    src/it/loadmtm2.c
+    src/it/loadmtm.c
+    src/it/loadmod2.c
+    src/it/loadmod.c
+    src/it/loadasy2.c
+    src/it/loadasy.c
+    src/it/loadamf2.c
+    src/it/loadamf.c
+    src/it/load6692.c
+    src/it/load669.c
+    src/it/itunload.c
+    src/it/itrender.c
+    src/it/itread2.c
+    src/it/itread.c
+    src/it/itorder.c
+    src/it/itmisc.c
+    src/it/itload2.c
+    src/it/itload.c
+    src/it/readany.c
+    src/it/loadany2.c
+    src/it/loadany.c
+    src/it/readany2.c
+    src/helpers/resampler.c
+    src/helpers/lpc.c
+)
+target_link_libraries( dumb )
+
+if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE )
+	CHECK_CXX_COMPILER_FLAG( -msse DUMB_CAN_USE_SSE )
+
+	if( DUMB_CAN_USE_SSE )
+		set_source_files_properties( src/helpers/resampler.c PROPERTIES COMPILE_FLAGS -msse )
+	endif()
+endif()
diff --git a/libraries/dumb/cmake/CMakeLists.txt b/libraries/dumb/cmake/CMakeLists.txt
new file mode 100644
index 000000000..6cafa7219
--- /dev/null
+++ b/libraries/dumb/cmake/CMakeLists.txt
@@ -0,0 +1,118 @@
+cmake_minimum_required(VERSION 2.8.7)
+project(libdumb C)
+
+set(CMAKE_C_FLAGS "-Wall -DDUMB_DECLARE_DEPRECATED -D_USE_SSE -msse -Wno-unused-variable -Wno-unused-but-set-variable")
+set(CMAKE_C_FLAGS_DEBUG "-ggdb -DDEBUGMODE=1 -D_DEBUG")
+set(CMAKE_C_FLAGS_RELEASE "-ffast-math -O2 -DNDEBUG")
+set(CMAKE_C_FLAGS_RELWITHDEBINFO "-ffast-math -g -O2 -DNDEBUG")
+set(CMAKE_C_FLAGS_MINSIZEREL "-ffast-math -Os -DNDEBUG")
+
+link_directories(${CMAKE_CURRENT_BINARY_DIR})
+include_directories(../include/)
+
+SET(SOURCES
+    ../src/core/unload.c
+    ../src/core/rendsig.c
+    ../src/core/rendduh.c
+    ../src/core/register.c
+    ../src/core/readduh.c
+    ../src/core/rawsig.c
+    ../src/core/makeduh.c
+    ../src/core/loadduh.c
+    ../src/core/dumbfile.c
+    ../src/core/duhtag.c
+    ../src/core/duhlen.c
+    ../src/core/atexit.c
+    ../src/helpers/stdfile.c
+    ../src/helpers/silence.c
+    ../src/helpers/sampbuf.c
+    ../src/helpers/riff.c
+    ../src/helpers/resample.c
+    ../src/helpers/memfile.c
+    ../src/helpers/clickrem.c
+    ../src/helpers/barray.c
+    ../src/helpers/tarray.c
+    ../src/it/xmeffect.c
+    ../src/it/readxm2.c
+    ../src/it/readxm.c
+    ../src/it/readstm2.c
+    ../src/it/readstm.c
+    ../src/it/reads3m2.c
+    ../src/it/reads3m.c
+    ../src/it/readriff.c
+    ../src/it/readptm.c
+    ../src/it/readpsm.c
+    ../src/it/readoldpsm.c
+    ../src/it/readokt2.c
+    ../src/it/readokt.c
+    ../src/it/readmtm.c
+    ../src/it/readmod2.c
+    ../src/it/readmod.c
+    ../src/it/readdsmf.c
+    ../src/it/readasy.c
+    ../src/it/readamf2.c
+    ../src/it/readamf.c
+    ../src/it/readam.c
+    ../src/it/read6692.c
+    ../src/it/read669.c
+    ../src/it/ptmeffect.c
+    ../src/it/loadxm2.c
+    ../src/it/loadxm.c
+    ../src/it/loadstm2.c
+    ../src/it/loadstm.c
+    ../src/it/loads3m2.c
+    ../src/it/loads3m.c
+    ../src/it/loadriff2.c
+    ../src/it/loadriff.c
+    ../src/it/loadptm2.c
+    ../src/it/loadptm.c
+    ../src/it/loadpsm2.c
+    ../src/it/loadpsm.c
+    ../src/it/loadoldpsm2.c
+    ../src/it/loadoldpsm.c
+    ../src/it/loadokt2.c
+    ../src/it/loadokt.c
+    ../src/it/loadmtm2.c
+    ../src/it/loadmtm.c
+    ../src/it/loadmod2.c
+    ../src/it/loadmod.c
+    ../src/it/loadasy2.c
+    ../src/it/loadasy.c
+    ../src/it/loadamf2.c
+    ../src/it/loadamf.c
+    ../src/it/load6692.c
+    ../src/it/load669.c
+    ../src/it/itunload.c
+    ../src/it/itrender.c
+    ../src/it/itread2.c
+    ../src/it/itread.c
+    ../src/it/itorder.c
+    ../src/it/itmisc.c
+    ../src/it/itload2.c
+    ../src/it/itload.c
+    ../src/it/readany.c
+    ../src/it/loadany2.c
+    ../src/it/loadany.c
+    ../src/it/readany2.c
+    ../src/helpers/resampler.c
+    ../src/helpers/lpc.c
+)
+
+set(INSTALL_HEADERS
+    ../include/dumb.h
+)
+
+add_library(dumb ${SOURCES})
+set_target_properties(dumb PROPERTIES DEBUG_POSTFIX d)
+
+# Make sure the dylib install name path is set on OSX so you can include dumb in app bundles
+IF(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
+    set_target_properties(dumb PROPERTIES INSTALL_NAME_DIR ${CMAKE_INSTALL_PREFIX}/lib)
+ENDIF()
+
+INSTALL(FILES ${INSTALL_HEADERS} DESTINATION include/)
+INSTALL(TARGETS dumb
+    RUNTIME DESTINATION bin
+    LIBRARY DESTINATION lib
+    ARCHIVE DESTINATION lib
+)
diff --git a/libraries/dumb/cmake/readme.txt b/libraries/dumb/cmake/readme.txt
new file mode 100644
index 000000000..32897c797
--- /dev/null
+++ b/libraries/dumb/cmake/readme.txt
@@ -0,0 +1,30 @@
+Howto build libdumb with cmake
+==============================
+
+A quick example
+---------------
+
+In libdumb cmake directory (dumb/cmake/), run:
+```
+mkdir -p build
+cd build
+cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local -DBUILD_SHARED_LIBS:BOOL=ON ..
+make
+make install
+```
+
+Steps
+-----
+
+1. Create a new temporary build directory and cd into it
+2. Run libdumb cmake file with cmake (eg. `cmake -DCMAKE_INSTALL_PREFIX=/install/dir -DBUILD_SHARED_LIBS:BOOL=OFF -DCMAKE_BUILD_TYPE=Release path/to/dumb/cmake/dir`).
+3. Run make (eg. just `make` or `mingw32-make` or something).
+4. If needed, run make install.
+
+Flags
+-----
+
+* CMAKE_INSTALL_PREFIX sets the installation path prefix
+* CMAKE_BUILD_TYPE sets the build type (eg. Release, Debug, RelWithDebInfo, MinSizeRel). Debug libraries will be named libdumbd, release libraries libdumb.
+* BUILD_SHARED_LIBS selects whether cmake should build dynamic or static library (On=shared, OFF=static)
+* You may also need to tell cmake what kind of makefiles to create with the "-G" flag. Eg. for MSYS one would say something like `cmake -G "MSYS Makefiles" .`.
diff --git a/libraries/dumb/include/dumb.h b/libraries/dumb/include/dumb.h
new file mode 100644
index 000000000..8ac820229
--- /dev/null
+++ b/libraries/dumb/include/dumb.h
@@ -0,0 +1,811 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * dumb.h - The user header file for DUMB.            / / \  \
+ *                                                   | <  /   \_
+ * Include this file in any of your files in         |  \/ /\   /
+ * which you wish to use the DUMB functions           \_  /  > /
+ * and variables.                                       | \ / /
+ *                                                      |  ' /
+ * Allegro users, you will probably want aldumb.h.       \__/
+ */
+
+#ifndef DUMB_H
+#define DUMB_H
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#if defined(_DEBUG) && defined(_MSC_VER)
+#ifndef _CRTDBG_MAP_ALLOC
+//#define _CRTDBG_MAP_ALLOC
+#endif
+#include <crtdbg.h>
+#endif
+
+#ifdef __cplusplus
+	extern "C" {
+#endif
+
+
+#define DUMB_MAJOR_VERSION    1
+#define DUMB_MINOR_VERSION    0
+#define DUMB_REVISION_VERSION 0
+
+#define DUMB_VERSION (DUMB_MAJOR_VERSION*10000 + DUMB_MINOR_VERSION*100 + DUMB_REVISION_VERSION)
+
+#define DUMB_VERSION_STR "1.0.0"
+
+#define DUMB_NAME "DUMB v" DUMB_VERSION_STR
+
+#define DUMB_YEAR  2015
+#define DUMB_MONTH 1
+#define DUMB_DAY   17
+
+#define DUMB_YEAR_STR2  "15"
+#define DUMB_YEAR_STR4  "2015"
+#define DUMB_MONTH_STR1 "1"
+#define DUMB_DAY_STR1   "17"
+
+#if DUMB_MONTH < 10
+#define DUMB_MONTH_STR2 "0" DUMB_MONTH_STR1
+#else
+#define DUMB_MONTH_STR2 DUMB_MONTH_STR1
+#endif
+
+#if DUMB_DAY < 10
+#define DUMB_DAY_STR2 "0" DUMB_DAY_STR1
+#else
+#define DUMB_DAY_STR2 DUMB_DAY_STR1
+#endif
+
+
+/* WARNING: The month and day were inadvertently swapped in the v0.8 release.
+ *          Please do not compare this constant against any date in 2002. In
+ *          any case, DUMB_VERSION is probably more useful for this purpose.
+ */
+#define DUMB_DATE (DUMB_YEAR*10000 + DUMB_MONTH*100 + DUMB_DAY)
+
+#define DUMB_DATE_STR DUMB_DAY_STR1 "." DUMB_MONTH_STR1 "." DUMB_YEAR_STR4
+
+
+#undef MIN
+#undef MAX
+#undef MID
+
+#define MIN(x,y)   (((x) < (y)) ? (x) : (y))
+#define MAX(x,y)   (((x) > (y)) ? (x) : (y))
+#define MID(x,y,z) MAX((x), MIN((y), (z)))
+
+#undef ABS
+#define ABS(x) (((x) >= 0) ? (x) : (-(x)))
+
+
+#ifdef DEBUGMODE
+
+#ifndef ASSERT
+#include <assert.h>
+#define ASSERT(n) assert(n)
+#endif
+#ifndef TRACE
+// it would be nice if this did actually trace ...
+#define TRACE 1 ? (void)0 : (void)printf
+#endif
+
+#else
+
+#ifndef ASSERT
+#define ASSERT(n)
+#endif
+#ifndef TRACE
+#define TRACE 1 ? (void)0 : (void)printf
+#endif
+
+#endif
+
+
+#define DUMB_ID(a,b,c,d) (((unsigned int)(a) << 24) | \
+                          ((unsigned int)(b) << 16) | \
+                          ((unsigned int)(c) <<  8) | \
+                          ((unsigned int)(d)      ))
+
+
+#ifdef __DOS__
+typedef long int32;
+typedef unsigned long uint32;
+typedef signed long sint32;
+#else
+typedef int int32;
+typedef unsigned int uint32;
+typedef signed int sint32;
+#endif
+
+#define CDECL
+#ifndef LONG_LONG
+#if defined __GNUC__ || defined __INTEL_COMPILER || defined __MWERKS__
+#define LONG_LONG long long
+#elif defined _MSC_VER || defined __WATCOMC__
+#define LONG_LONG __int64
+#undef CDECL
+#define CDECL __cdecl
+#elif defined __sgi
+#define LONG_LONG long long
+#else
+#error 64-bit integer type unknown
+#endif
+#endif
+
+#if __GNUC__ * 100 + __GNUC_MINOR__ >= 301 /* GCC 3.1+ */
+#ifndef DUMB_DECLARE_DEPRECATED
+#define DUMB_DECLARE_DEPRECATED
+#endif
+#define DUMB_DEPRECATED __attribute__((__deprecated__))
+#else
+#define DUMB_DEPRECATED
+#endif
+
+#define DUMBEXPORT CDECL
+#define DUMBCALLBACK CDECL
+
+/* Basic Sample Type. Normal range is -0x800000 to 0x7FFFFF. */
+
+typedef int sample_t;
+
+
+/* Library Clean-up Management */
+
+int dumb_atexit(void (*proc)(void));
+
+void dumb_exit(void);
+
+
+/* File Input Functions */
+
+typedef struct DUMBFILE_SYSTEM
+{
+	void *(DUMBCALLBACK *open)(const char *filename);
+	int (DUMBCALLBACK *skip)(void *f, long n);
+	int (DUMBCALLBACK *getc)(void *f);
+	int32 (DUMBCALLBACK *getnc)(char *ptr, int32 n, void *f);
+	void (DUMBCALLBACK *close)(void *f);
+    int (DUMBCALLBACK *seek)(void *f, long n);
+    long (DUMBCALLBACK *get_size)(void *f);
+}
+DUMBFILE_SYSTEM;
+
+typedef struct DUMBFILE DUMBFILE;
+
+void DUMBEXPORT register_dumbfile_system(const DUMBFILE_SYSTEM *dfs);
+
+DUMBFILE *DUMBEXPORT dumbfile_open(const char *filename);
+DUMBFILE *DUMBEXPORT dumbfile_open_ex(void *file, const DUMBFILE_SYSTEM *dfs);
+
+int32 DUMBEXPORT dumbfile_pos(DUMBFILE *f);
+int DUMBEXPORT dumbfile_skip(DUMBFILE *f, long n);
+
+#define DFS_SEEK_SET 0
+#define DFS_SEEK_CUR 1
+#define DFS_SEEK_END 2
+
+int DUMBEXPORT dumbfile_seek(DUMBFILE *f, long n, int origin);
+
+int32 DUMBEXPORT dumbfile_get_size(DUMBFILE *f);
+
+int DUMBEXPORT dumbfile_getc(DUMBFILE *f);
+
+int DUMBEXPORT dumbfile_igetw(DUMBFILE *f);
+int DUMBEXPORT dumbfile_mgetw(DUMBFILE *f);
+
+int32 DUMBEXPORT dumbfile_igetl(DUMBFILE *f);
+int32 DUMBEXPORT dumbfile_mgetl(DUMBFILE *f);
+
+uint32 DUMBEXPORT dumbfile_cgetul(DUMBFILE *f);
+sint32 DUMBEXPORT dumbfile_cgetsl(DUMBFILE *f);
+
+int32 DUMBEXPORT dumbfile_getnc(char *ptr, int32 n, DUMBFILE *f);
+
+int DUMBEXPORT dumbfile_error(DUMBFILE *f);
+int DUMBEXPORT dumbfile_close(DUMBFILE *f);
+
+
+/* stdio File Input Module */
+
+void DUMBEXPORT dumb_register_stdfiles(void);
+
+DUMBFILE *DUMBEXPORT dumbfile_open_stdfile(FILE *p);
+
+
+/* Memory File Input Module */
+
+DUMBFILE *DUMBEXPORT dumbfile_open_memory(const char *data, int32 size);
+
+
+/* DUH Management */
+
+typedef struct DUH DUH;
+
+#define DUH_SIGNATURE DUMB_ID('D','U','H','!')
+
+void DUMBEXPORT unload_duh(DUH *duh);
+
+DUH *DUMBEXPORT load_duh(const char *filename);
+DUH *DUMBEXPORT read_duh(DUMBFILE *f);
+
+int32 DUMBEXPORT duh_get_length(DUH *duh);
+
+const char *DUMBEXPORT duh_get_tag(DUH *duh, const char *key);
+
+/* Signal Rendering Functions */
+
+typedef struct DUH_SIGRENDERER DUH_SIGRENDERER;
+
+DUH_SIGRENDERER *DUMBEXPORT duh_start_sigrenderer(DUH *duh, int sig, int n_channels, int32 pos);
+
+#ifdef DUMB_DECLARE_DEPRECATED
+typedef void (*DUH_SIGRENDERER_CALLBACK)(void *data, sample_t **samples, int n_channels, int32 length);
+/* This is deprecated, but is not marked as such because GCC tends to
+ * complain spuriously when the typedef is used later. See comments below.
+ */
+
+void duh_sigrenderer_set_callback(
+	DUH_SIGRENDERER *sigrenderer,
+	DUH_SIGRENDERER_CALLBACK callback, void *data
+) DUMB_DEPRECATED;
+/* The 'callback' argument's type has changed for const-correctness. See the
+ * DUH_SIGRENDERER_CALLBACK definition just above. Also note that the samples
+ * in the buffer are now 256 times as large; the normal range is -0x800000 to
+ * 0x7FFFFF. The function has been renamed partly because its functionality
+ * has changed slightly and partly so that its name is more meaningful. The
+ * new one is duh_sigrenderer_set_analyser_callback(), and the typedef for
+ * the function pointer has also changed, from DUH_SIGRENDERER_CALLBACK to
+ * DUH_SIGRENDERER_ANALYSER_CALLBACK. (If you wanted to use this callback to
+ * apply a DSP effect, don't worry; there is a better way of doing this. It
+ * is undocumented, so contact me and I shall try to help. Contact details
+ * are in readme.txt.)
+ */
+
+typedef void (*DUH_SIGRENDERER_ANALYSER_CALLBACK)(void *data, const sample_t *const *samples, int n_channels, int32 length);
+/* This is deprecated, but is not marked as such because GCC tends to
+ * complain spuriously when the typedef is used later. See comments below.
+ */
+
+void duh_sigrenderer_set_analyser_callback(
+	DUH_SIGRENDERER *sigrenderer,
+	DUH_SIGRENDERER_ANALYSER_CALLBACK callback, void *data
+) DUMB_DEPRECATED;
+/* This is deprecated because the meaning of the 'samples' parameter in the
+ * callback needed to change. For stereo applications, the array used to be
+ * indexed with samples[channel][pos]. It is now indexed with
+ * samples[0][pos*2+channel]. Mono sample data are still indexed with
+ * samples[0][pos]. The array is still 2D because samples will probably only
+ * ever be interleaved in twos. In order to fix your code, adapt it to the
+ * new sample layout and then call
+ * duh_sigrenderer_set_sample_analyser_callback below instead of this
+ * function.
+ */
+#endif
+
+typedef void (*DUH_SIGRENDERER_SAMPLE_ANALYSER_CALLBACK)(void *data, const sample_t *const *samples, int n_channels, int32 length);
+
+void duh_sigrenderer_set_sample_analyser_callback(
+	DUH_SIGRENDERER *sigrenderer,
+	DUH_SIGRENDERER_SAMPLE_ANALYSER_CALLBACK callback, void *data
+);
+
+int DUMBEXPORT duh_sigrenderer_get_n_channels(DUH_SIGRENDERER *sigrenderer);
+int32 DUMBEXPORT duh_sigrenderer_get_position(DUH_SIGRENDERER *sigrenderer);
+
+void DUMBEXPORT duh_sigrenderer_set_sigparam(DUH_SIGRENDERER *sigrenderer, unsigned char id, int32 value);
+
+#ifdef DUMB_DECLARE_DEPRECATED
+int32 duh_sigrenderer_get_samples(
+	DUH_SIGRENDERER *sigrenderer,
+	float volume, float delta,
+	int32 size, sample_t **samples
+) DUMB_DEPRECATED;
+/* The sample format has changed, so if you were using this function,
+ * you should switch to duh_sigrenderer_generate_samples() and change
+ * how you interpret the samples array. See the comments for
+ * duh_sigrenderer_set_analyser_callback().
+ */
+#endif
+
+int32 DUMBEXPORT duh_sigrenderer_generate_samples(
+	DUH_SIGRENDERER *sigrenderer,
+	double volume, double delta,
+	int32 size, sample_t **samples
+);
+
+void DUMBEXPORT duh_sigrenderer_get_current_sample(DUH_SIGRENDERER *sigrenderer, float volume, sample_t *samples);
+
+void DUMBEXPORT duh_end_sigrenderer(DUH_SIGRENDERER *sigrenderer);
+
+
+/* DUH Rendering Functions */
+
+int32 DUMBEXPORT duh_render(
+	DUH_SIGRENDERER *sigrenderer,
+	int bits, int unsign,
+	float volume, float delta,
+	int32 size, void *sptr
+);
+
+#ifdef DUMB_DECLARE_DEPRECATED
+
+int32 duh_render_signal(
+	DUH_SIGRENDERER *sigrenderer,
+	float volume, float delta,
+	int32 size, sample_t **samples
+) DUMB_DEPRECATED;
+/* Please use duh_sigrenderer_generate_samples(), and see the
+ * comments for the deprecated duh_sigrenderer_get_samples() too.
+ */
+
+typedef DUH_SIGRENDERER DUH_RENDERER DUMB_DEPRECATED;
+/* Please use DUH_SIGRENDERER instead of DUH_RENDERER. */
+
+DUH_SIGRENDERER *duh_start_renderer(DUH *duh, int n_channels, int32 pos) DUMB_DEPRECATED;
+/* Please use duh_start_sigrenderer() instead. Pass 0 for 'sig'. */
+
+int duh_renderer_get_n_channels(DUH_SIGRENDERER *dr) DUMB_DEPRECATED;
+int32 duh_renderer_get_position(DUH_SIGRENDERER *dr) DUMB_DEPRECATED;
+/* Please use the duh_sigrenderer_*() equivalents of these two functions. */
+
+void duh_end_renderer(DUH_SIGRENDERER *dr) DUMB_DEPRECATED;
+/* Please use duh_end_sigrenderer() instead. */
+
+DUH_SIGRENDERER *duh_renderer_encapsulate_sigrenderer(DUH_SIGRENDERER *sigrenderer) DUMB_DEPRECATED;
+DUH_SIGRENDERER *duh_renderer_get_sigrenderer(DUH_SIGRENDERER *dr) DUMB_DEPRECATED;
+DUH_SIGRENDERER *duh_renderer_decompose_to_sigrenderer(DUH_SIGRENDERER *dr) DUMB_DEPRECATED;
+/* These functions have become no-ops that just return the parameter.
+ * So, for instance, replace
+ *   duh_renderer_encapsulate_sigrenderer(my_sigrenderer)
+ * with
+ *   my_sigrenderer
+ */
+
+#endif
+
+
+/* Impulse Tracker Support */
+
+extern int dumb_it_max_to_mix;
+
+typedef struct DUMB_IT_SIGDATA DUMB_IT_SIGDATA;
+typedef struct DUMB_IT_SIGRENDERER DUMB_IT_SIGRENDERER;
+
+DUMB_IT_SIGDATA *DUMBEXPORT duh_get_it_sigdata(DUH *duh);
+DUH_SIGRENDERER *DUMBEXPORT duh_encapsulate_it_sigrenderer(DUMB_IT_SIGRENDERER *it_sigrenderer, int n_channels, int32 pos);
+DUMB_IT_SIGRENDERER *DUMBEXPORT duh_get_it_sigrenderer(DUH_SIGRENDERER *sigrenderer);
+
+int DUMBEXPORT dumb_it_trim_silent_patterns(DUH * duh);
+
+typedef int (*dumb_scan_callback)(void *, int, int32);
+int DUMBEXPORT dumb_it_scan_for_playable_orders(DUMB_IT_SIGDATA *sigdata, dumb_scan_callback callback, void * callback_data);
+
+DUH_SIGRENDERER *DUMBEXPORT dumb_it_start_at_order(DUH *duh, int n_channels, int startorder);
+
+enum
+{
+    DUMB_IT_RAMP_NONE = 0,
+    DUMB_IT_RAMP_ONOFF_ONLY = 1,
+    DUMB_IT_RAMP_FULL = 2
+};
+        
+void DUMBEXPORT dumb_it_set_ramp_style(DUMB_IT_SIGRENDERER * sigrenderer, int ramp_style);
+        
+void DUMBEXPORT dumb_it_set_loop_callback(DUMB_IT_SIGRENDERER *sigrenderer, int (DUMBCALLBACK *callback)(void *data), void *data);
+void DUMBEXPORT dumb_it_set_xm_speed_zero_callback(DUMB_IT_SIGRENDERER *sigrenderer, int (DUMBCALLBACK *callback)(void *data), void *data);
+void DUMBEXPORT dumb_it_set_midi_callback(DUMB_IT_SIGRENDERER *sigrenderer, int (DUMBCALLBACK *callback)(void *data, int channel, unsigned char midi_byte), void *data);
+void DUMBEXPORT dumb_it_set_global_volume_zero_callback(DUMB_IT_SIGRENDERER *sigrenderer, int (DUMBCALLBACK *callback)(void *data), void *data);
+
+int DUMBCALLBACK dumb_it_callback_terminate(void *data);
+int DUMBCALLBACK dumb_it_callback_midi_block(void *data, int channel, unsigned char midi_byte);
+
+/* dumb_*_mod*: restrict_ |= 1-Don't read 15 sample files / 2-Use old pattern counting method */
+
+DUH *DUMBEXPORT dumb_load_it(const char *filename);
+DUH *DUMBEXPORT dumb_load_xm(const char *filename);
+DUH *DUMBEXPORT dumb_load_s3m(const char *filename);
+DUH *DUMBEXPORT dumb_load_stm(const char *filename);
+DUH *DUMBEXPORT dumb_load_mod(const char *filename, int restrict_);
+DUH *DUMBEXPORT dumb_load_ptm(const char *filename);
+DUH *DUMBEXPORT dumb_load_669(const char *filename);
+DUH *DUMBEXPORT dumb_load_psm(const char *filename, int subsong);
+DUH *DUMBEXPORT dumb_load_old_psm(const char * filename);
+DUH *DUMBEXPORT dumb_load_mtm(const char *filename);
+DUH *DUMBEXPORT dumb_load_riff(const char *filename);
+DUH *DUMBEXPORT dumb_load_asy(const char *filename);
+DUH *DUMBEXPORT dumb_load_amf(const char *filename);
+DUH *DUMBEXPORT dumb_load_okt(const char *filename);
+
+DUH *DUMBEXPORT dumb_read_it(DUMBFILE *f);
+DUH *DUMBEXPORT dumb_read_xm(DUMBFILE *f);
+DUH *DUMBEXPORT dumb_read_s3m(DUMBFILE *f);
+DUH *DUMBEXPORT dumb_read_stm(DUMBFILE *f);
+DUH *DUMBEXPORT dumb_read_mod(DUMBFILE *f, int restrict_);
+DUH *DUMBEXPORT dumb_read_ptm(DUMBFILE *f);
+DUH *DUMBEXPORT dumb_read_669(DUMBFILE *f);
+DUH *DUMBEXPORT dumb_read_psm(DUMBFILE *f, int subsong);
+DUH *DUMBEXPORT dumb_read_old_psm(DUMBFILE *f);
+DUH *DUMBEXPORT dumb_read_mtm(DUMBFILE *f);
+DUH *DUMBEXPORT dumb_read_riff(DUMBFILE *f);
+DUH *DUMBEXPORT dumb_read_asy(DUMBFILE *f);
+DUH *DUMBEXPORT dumb_read_amf(DUMBFILE *f);
+DUH *DUMBEXPORT dumb_read_okt(DUMBFILE *f);
+
+DUH *DUMBEXPORT dumb_load_it_quick(const char *filename);
+DUH *DUMBEXPORT dumb_load_xm_quick(const char *filename);
+DUH *DUMBEXPORT dumb_load_s3m_quick(const char *filename);
+DUH *DUMBEXPORT dumb_load_stm_quick(const char *filename);
+DUH *DUMBEXPORT dumb_load_mod_quick(const char *filename, int restrict_);
+DUH *DUMBEXPORT dumb_load_ptm_quick(const char *filename);
+DUH *DUMBEXPORT dumb_load_669_quick(const char *filename);
+DUH *DUMBEXPORT dumb_load_psm_quick(const char *filename, int subsong);
+DUH *DUMBEXPORT dumb_load_old_psm_quick(const char * filename);
+DUH *DUMBEXPORT dumb_load_mtm_quick(const char *filename);
+DUH *DUMBEXPORT dumb_load_riff_quick(const char *filename);
+DUH *DUMBEXPORT dumb_load_asy_quick(const char *filename);
+DUH *DUMBEXPORT dumb_load_amf_quick(const char *filename);
+DUH *DUMBEXPORT dumb_load_okt_quick(const char *filename);
+
+DUH *DUMBEXPORT dumb_read_it_quick(DUMBFILE *f);
+DUH *DUMBEXPORT dumb_read_xm_quick(DUMBFILE *f);
+DUH *DUMBEXPORT dumb_read_s3m_quick(DUMBFILE *f);
+DUH *DUMBEXPORT dumb_read_stm_quick(DUMBFILE *f);
+DUH *DUMBEXPORT dumb_read_mod_quick(DUMBFILE *f, int restrict_);
+DUH *DUMBEXPORT dumb_read_ptm_quick(DUMBFILE *f);
+DUH *DUMBEXPORT dumb_read_669_quick(DUMBFILE *f);
+DUH *DUMBEXPORT dumb_read_psm_quick(DUMBFILE *f, int subsong);
+DUH *DUMBEXPORT dumb_read_old_psm_quick(DUMBFILE *f);
+DUH *DUMBEXPORT dumb_read_mtm_quick(DUMBFILE *f);
+DUH *DUMBEXPORT dumb_read_riff_quick(DUMBFILE *f);
+DUH *DUMBEXPORT dumb_read_asy_quick(DUMBFILE *f);
+DUH *DUMBEXPORT dumb_read_amf_quick(DUMBFILE *f);
+DUH *DUMBEXPORT dumb_read_okt_quick(DUMBFILE *f);
+
+DUH *DUMBEXPORT dumb_read_any_quick(DUMBFILE *f, int restrict_, int subsong);
+DUH *DUMBEXPORT dumb_read_any(DUMBFILE *f, int restrict_, int subsong);
+
+DUH *DUMBEXPORT dumb_load_any_quick(const char *filename, int restrict_, int subsong);
+DUH *DUMBEXPORT dumb_load_any(const char *filename, int restrict_, int subsong);
+
+int32 DUMBEXPORT dumb_it_build_checkpoints(DUMB_IT_SIGDATA *sigdata, int startorder);
+void DUMBEXPORT dumb_it_do_initial_runthrough(DUH *duh);
+
+int DUMBEXPORT dumb_get_psm_subsong_count(DUMBFILE *f);
+
+const unsigned char *DUMBEXPORT dumb_it_sd_get_song_message(DUMB_IT_SIGDATA *sd);
+
+int DUMBEXPORT dumb_it_sd_get_n_orders(DUMB_IT_SIGDATA *sd);
+int DUMBEXPORT dumb_it_sd_get_n_samples(DUMB_IT_SIGDATA *sd);
+int DUMBEXPORT dumb_it_sd_get_n_instruments(DUMB_IT_SIGDATA *sd);
+
+const unsigned char *DUMBEXPORT dumb_it_sd_get_sample_name(DUMB_IT_SIGDATA *sd, int i);
+const unsigned char *DUMBEXPORT dumb_it_sd_get_sample_filename(DUMB_IT_SIGDATA *sd, int i);
+const unsigned char *DUMBEXPORT dumb_it_sd_get_instrument_name(DUMB_IT_SIGDATA *sd, int i);
+const unsigned char *DUMBEXPORT dumb_it_sd_get_instrument_filename(DUMB_IT_SIGDATA *sd, int i);
+
+int DUMBEXPORT dumb_it_sd_get_initial_global_volume(DUMB_IT_SIGDATA *sd);
+void DUMBEXPORT dumb_it_sd_set_initial_global_volume(DUMB_IT_SIGDATA *sd, int gv);
+
+int DUMBEXPORT dumb_it_sd_get_mixing_volume(DUMB_IT_SIGDATA *sd);
+void DUMBEXPORT dumb_it_sd_set_mixing_volume(DUMB_IT_SIGDATA *sd, int mv);
+
+int DUMBEXPORT dumb_it_sd_get_initial_speed(DUMB_IT_SIGDATA *sd);
+void DUMBEXPORT dumb_it_sd_set_initial_speed(DUMB_IT_SIGDATA *sd, int speed);
+
+int DUMBEXPORT dumb_it_sd_get_initial_tempo(DUMB_IT_SIGDATA *sd);
+void DUMBEXPORT dumb_it_sd_set_initial_tempo(DUMB_IT_SIGDATA *sd, int tempo);
+
+int DUMBEXPORT dumb_it_sd_get_initial_channel_volume(DUMB_IT_SIGDATA *sd, int channel);
+void DUMBEXPORT dumb_it_sd_set_initial_channel_volume(DUMB_IT_SIGDATA *sd, int channel, int volume);
+
+int DUMBEXPORT dumb_it_sr_get_current_order(DUMB_IT_SIGRENDERER *sr);
+int DUMBEXPORT dumb_it_sr_get_current_row(DUMB_IT_SIGRENDERER *sr);
+
+int DUMBEXPORT dumb_it_sr_get_global_volume(DUMB_IT_SIGRENDERER *sr);
+void DUMBEXPORT dumb_it_sr_set_global_volume(DUMB_IT_SIGRENDERER *sr, int gv);
+
+int DUMBEXPORT dumb_it_sr_get_tempo(DUMB_IT_SIGRENDERER *sr);
+void DUMBEXPORT dumb_it_sr_set_tempo(DUMB_IT_SIGRENDERER *sr, int tempo);
+
+int DUMBEXPORT dumb_it_sr_get_speed(DUMB_IT_SIGRENDERER *sr);
+void DUMBEXPORT dumb_it_sr_set_speed(DUMB_IT_SIGRENDERER *sr, int speed);
+
+#define DUMB_IT_N_CHANNELS 64
+#define DUMB_IT_N_NNA_CHANNELS 192
+#define DUMB_IT_TOTAL_CHANNELS (DUMB_IT_N_CHANNELS + DUMB_IT_N_NNA_CHANNELS)
+
+/* Channels passed to any of these functions are 0-based */
+int DUMBEXPORT dumb_it_sr_get_channel_volume(DUMB_IT_SIGRENDERER *sr, int channel);
+void DUMBEXPORT dumb_it_sr_set_channel_volume(DUMB_IT_SIGRENDERER *sr, int channel, int volume);
+
+int DUMBEXPORT dumb_it_sr_get_channel_muted(DUMB_IT_SIGRENDERER *sr, int channel);
+void DUMBEXPORT dumb_it_sr_set_channel_muted(DUMB_IT_SIGRENDERER *sr, int channel, int muted);
+
+typedef struct DUMB_IT_CHANNEL_STATE DUMB_IT_CHANNEL_STATE;
+
+struct DUMB_IT_CHANNEL_STATE
+{
+	int channel; /* 0-based; meaningful for NNA channels */
+	int sample; /* 1-based; 0 if nothing playing, then other fields undef */
+	int freq; /* in Hz */
+	float volume; /* 1.0 maximum; affected by ALL factors, inc. mixing vol */
+	unsigned char pan; /* 0-64, 100 for surround */
+	signed char subpan; /* use (pan + subpan/256.0f) or ((pan<<8)+subpan) */
+	unsigned char filter_cutoff;    /* 0-127    cutoff=127 AND resonance=0 */
+	unsigned char filter_subcutoff; /* 0-255      -> no filters (subcutoff */
+	unsigned char filter_resonance; /* 0-127        always 0 in this case) */
+	/* subcutoff only changes from zero if filter envelopes are in use. The
+	 * calculation (filter_cutoff + filter_subcutoff/256.0f) gives a more
+	 * accurate filter cutoff measurement as a float. It would often be more
+	 * useful to use a scaled int such as ((cutoff<<8) + subcutoff).
+	 */
+};
+
+/* Values of 64 or more will access NNA channels here. */
+void DUMBEXPORT dumb_it_sr_get_channel_state(DUMB_IT_SIGRENDERER *sr, int channel, DUMB_IT_CHANNEL_STATE *state);
+
+
+/* Signal Design Helper Values */
+
+/* Use pow(DUMB_SEMITONE_BASE, n) to get the 'delta' value to transpose up by
+ * n semitones. To transpose down, use negative n.
+ */
+#define DUMB_SEMITONE_BASE 1.059463094359295309843105314939748495817
+
+/* Use pow(DUMB_QUARTERTONE_BASE, n) to get the 'delta' value to transpose up
+ * by n quartertones. To transpose down, use negative n.
+ */
+#define DUMB_QUARTERTONE_BASE 1.029302236643492074463779317738953977823
+
+/* Use pow(DUMB_PITCH_BASE, n) to get the 'delta' value to transpose up by n
+ * units. In this case, 256 units represent one semitone; 3072 units
+ * represent one octave. These units are used by the sequence signal (SEQU).
+ */
+#define DUMB_PITCH_BASE 1.000225659305069791926712241547647863626
+
+
+/* Signal Design Function Types */
+
+typedef void sigdata_t;
+typedef void sigrenderer_t;
+
+typedef sigdata_t *(*DUH_LOAD_SIGDATA)(DUH *duh, DUMBFILE *file);
+
+typedef sigrenderer_t *(*DUH_START_SIGRENDERER)(
+	DUH *duh,
+	sigdata_t *sigdata,
+	int n_channels,
+	int32 pos
+);
+
+typedef void (*DUH_SIGRENDERER_SET_SIGPARAM)(
+	sigrenderer_t *sigrenderer,
+	unsigned char id, int32 value
+);
+
+typedef int32 (*DUH_SIGRENDERER_GENERATE_SAMPLES)(
+	sigrenderer_t *sigrenderer,
+	double volume, double delta,
+	int32 size, sample_t **samples
+);
+
+typedef void (*DUH_SIGRENDERER_GET_CURRENT_SAMPLE)(
+	sigrenderer_t *sigrenderer,
+	double volume,
+	sample_t *samples
+);
+
+typedef void (*DUH_END_SIGRENDERER)(sigrenderer_t *sigrenderer);
+
+typedef void (*DUH_UNLOAD_SIGDATA)(sigdata_t *sigdata);
+
+
+/* Signal Design Function Registration */
+
+typedef struct DUH_SIGTYPE_DESC
+{
+	int32 type;
+	DUH_LOAD_SIGDATA                   load_sigdata;
+	DUH_START_SIGRENDERER              start_sigrenderer;
+	DUH_SIGRENDERER_SET_SIGPARAM       sigrenderer_set_sigparam;
+	DUH_SIGRENDERER_GENERATE_SAMPLES   sigrenderer_generate_samples;
+	DUH_SIGRENDERER_GET_CURRENT_SAMPLE sigrenderer_get_current_sample;
+	DUH_END_SIGRENDERER                end_sigrenderer;
+	DUH_UNLOAD_SIGDATA                 unload_sigdata;
+}
+DUH_SIGTYPE_DESC;
+
+void DUMBEXPORT dumb_register_sigtype(DUH_SIGTYPE_DESC *desc);
+
+
+// Decide where to put these functions; new heading?
+
+sigdata_t *DUMBEXPORT duh_get_raw_sigdata(DUH *duh, int sig, int32 type);
+
+DUH_SIGRENDERER *DUMBEXPORT duh_encapsulate_raw_sigrenderer(sigrenderer_t *vsigrenderer, DUH_SIGTYPE_DESC *desc, int n_channels, int32 pos);
+sigrenderer_t *DUMBEXPORT duh_get_raw_sigrenderer(DUH_SIGRENDERER *sigrenderer, int32 type);
+
+int DUMBEXPORT duh_add_signal(DUH *duh, DUH_SIGTYPE_DESC *desc, sigdata_t *sigdata);
+
+
+/* Standard Signal Types */
+
+//void dumb_register_sigtype_sample(void);
+
+
+/* Sample Buffer Allocation Helpers */
+
+#ifdef DUMB_DECLARE_DEPRECATED
+sample_t **create_sample_buffer(int n_channels, int32 length) DUMB_DEPRECATED;
+/* DUMB has been changed to interleave stereo samples. Use
+ * allocate_sample_buffer() instead, and see the comments for
+ * duh_sigrenderer_set_analyser_callback().
+ */
+#endif
+sample_t **DUMBEXPORT allocate_sample_buffer(int n_channels, int32 length);
+void DUMBEXPORT destroy_sample_buffer(sample_t **samples);
+
+
+/* Silencing Helper */
+
+void DUMBEXPORT dumb_silence(sample_t *samples, int32 length);
+
+
+/* Click Removal Helpers */
+
+typedef struct DUMB_CLICK_REMOVER DUMB_CLICK_REMOVER;
+
+DUMB_CLICK_REMOVER *DUMBEXPORT dumb_create_click_remover(void);
+void DUMBEXPORT dumb_record_click(DUMB_CLICK_REMOVER *cr, int32 pos, sample_t step);
+void DUMBEXPORT dumb_remove_clicks(DUMB_CLICK_REMOVER *cr, sample_t *samples, int32 length, int step, double halflife);
+sample_t DUMBEXPORT dumb_click_remover_get_offset(DUMB_CLICK_REMOVER *cr);
+void DUMBEXPORT dumb_destroy_click_remover(DUMB_CLICK_REMOVER *cr);
+
+DUMB_CLICK_REMOVER **DUMBEXPORT dumb_create_click_remover_array(int n);
+void DUMBEXPORT dumb_record_click_array(int n, DUMB_CLICK_REMOVER **cr, int32 pos, sample_t *step);
+void DUMBEXPORT dumb_record_click_negative_array(int n, DUMB_CLICK_REMOVER **cr, int32 pos, sample_t *step);
+void DUMBEXPORT dumb_remove_clicks_array(int n, DUMB_CLICK_REMOVER **cr, sample_t **samples, int32 length, double halflife);
+void DUMBEXPORT dumb_click_remover_get_offset_array(int n, DUMB_CLICK_REMOVER **cr, sample_t *offset);
+void DUMBEXPORT dumb_destroy_click_remover_array(int n, DUMB_CLICK_REMOVER **cr);
+
+
+/* Resampling Helpers */
+
+#define DUMB_RQ_ALIASING 0
+#define DUMB_LQ_LINEAR   1
+#define DUMB_LQ_CUBIC    2
+
+#define DUMB_RQ_BLEP     3
+#define DUMB_RQ_LINEAR   4
+#define DUMB_RQ_BLAM     5
+#define DUMB_RQ_CUBIC    6
+#define DUMB_RQ_FIR      7
+#define DUMB_RQ_N_LEVELS 8
+
+/* Subtract quality above by this to convert to resampler.c's quality */
+#define DUMB_RESAMPLER_BASE	2
+
+extern int dumb_resampling_quality; /* This specifies the default */
+void DUMBEXPORT dumb_it_set_resampling_quality(DUMB_IT_SIGRENDERER * sigrenderer, int quality); /* This overrides it */
+
+typedef struct DUMB_RESAMPLER DUMB_RESAMPLER;
+
+typedef struct DUMB_VOLUME_RAMP_INFO DUMB_VOLUME_RAMP_INFO;
+
+typedef void (*DUMB_RESAMPLE_PICKUP)(DUMB_RESAMPLER *resampler, void *data);
+
+struct DUMB_RESAMPLER
+{
+	void *src;
+	int32 pos;
+	int subpos;
+	int32 start, end;
+	int dir;
+	DUMB_RESAMPLE_PICKUP pickup;
+	void *pickup_data;
+	int quality;
+	/* Everything below this point is internal: do not use. */
+	union {
+		sample_t x24[3*2];
+		short x16[3*2];
+		signed char x8[3*2];
+	} x;
+	int overshot;
+    double fir_resampler_ratio;
+    void* fir_resampler[2];
+};
+
+struct DUMB_VOLUME_RAMP_INFO
+{
+	float volume;
+	float delta;
+	float target;
+	float mix;
+    unsigned char declick_stage;
+};
+
+void dumb_reset_resampler(DUMB_RESAMPLER *resampler, sample_t *src, int src_channels, int32 pos, int32 start, int32 end, int quality);
+DUMB_RESAMPLER *dumb_start_resampler(sample_t *src, int src_channels, int32 pos, int32 start, int32 end, int quality);
+//int32 dumb_resample_1_1(DUMB_RESAMPLER *resampler, sample_t *dst, int32 dst_size, DUMB_VOLUME_RAMP_INFO * volume, double delta);
+int32 dumb_resample_1_2(DUMB_RESAMPLER *resampler, sample_t *dst, int32 dst_size, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, double delta);
+//int32 dumb_resample_2_1(DUMB_RESAMPLER *resampler, sample_t *dst, int32 dst_size, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, double delta);
+int32 dumb_resample_2_2(DUMB_RESAMPLER *resampler, sample_t *dst, int32 dst_size, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, double delta);
+//void dumb_resample_get_current_sample_1_1(DUMB_RESAMPLER *resampler, DUMB_VOLUME_RAMP_INFO * volume, sample_t *dst);
+void dumb_resample_get_current_sample_1_2(DUMB_RESAMPLER *resampler, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, sample_t *dst);
+//void dumb_resample_get_current_sample_2_1(DUMB_RESAMPLER *resampler, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, sample_t *dst);
+void dumb_resample_get_current_sample_2_2(DUMB_RESAMPLER *resampler, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, sample_t *dst);
+void dumb_end_resampler(DUMB_RESAMPLER *resampler);
+
+void dumb_reset_resampler_16(DUMB_RESAMPLER *resampler, short *src, int src_channels, int32 pos, int32 start, int32 end, int quality);
+DUMB_RESAMPLER *dumb_start_resampler_16(short *src, int src_channels, int32 pos, int32 start, int32 end, int quality);
+//int32 dumb_resample_16_1_1(DUMB_RESAMPLER *resampler, sample_t *dst, int32 dst_size, DUMB_VOLUME_RAMP_INFO * volume, double delta);
+int32 dumb_resample_16_1_2(DUMB_RESAMPLER *resampler, sample_t *dst, int32 dst_size, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, double delta);
+//int32 dumb_resample_16_2_1(DUMB_RESAMPLER *resampler, sample_t *dst, int32 dst_size, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, double delta);
+int32 dumb_resample_16_2_2(DUMB_RESAMPLER *resampler, sample_t *dst, int32 dst_size, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, double delta);
+//void dumb_resample_get_current_sample_16_1_1(DUMB_RESAMPLER *resampler, DUMB_VOLUME_RAMP_INFO * volume, sample_t *dst);
+void dumb_resample_get_current_sample_16_1_2(DUMB_RESAMPLER *resampler, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, sample_t *dst);
+//void dumb_resample_get_current_sample_16_2_1(DUMB_RESAMPLER *resampler, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, sample_t *dst);
+void dumb_resample_get_current_sample_16_2_2(DUMB_RESAMPLER *resampler, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, sample_t *dst);
+void dumb_end_resampler_16(DUMB_RESAMPLER *resampler);
+
+void dumb_reset_resampler_8(DUMB_RESAMPLER *resampler, signed char *src, int src_channels, int32 pos, int32 start, int32 end, int quality);
+DUMB_RESAMPLER *dumb_start_resampler_8(signed char *src, int src_channels, int32 pos, int32 start, int32 end, int quality);
+//int32 dumb_resample_8_1_1(DUMB_RESAMPLER *resampler, sample_t *dst, int32 dst_size, DUMB_VOLUME_RAMP_INFO * volume, double delta);
+int32 dumb_resample_8_1_2(DUMB_RESAMPLER *resampler, sample_t *dst, int32 dst_size, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, double delta);
+//int32 dumb_resample_8_2_1(DUMB_RESAMPLER *resampler, sample_t *dst, int32 dst_size, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, double delta);
+int32 dumb_resample_8_2_2(DUMB_RESAMPLER *resampler, sample_t *dst, int32 dst_size, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, double delta);
+//void dumb_resample_get_current_sample_8_1_1(DUMB_RESAMPLER *resampler, DUMB_VOLUME_RAMP_INFO * volume, sample_t *dst);
+void dumb_resample_get_current_sample_8_1_2(DUMB_RESAMPLER *resampler, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, sample_t *dst);
+//void dumb_resample_get_current_sample_8_2_1(DUMB_RESAMPLER *resampler, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, sample_t *dst);
+void dumb_resample_get_current_sample_8_2_2(DUMB_RESAMPLER *resampler, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, sample_t *dst);
+void dumb_end_resampler_8(DUMB_RESAMPLER *resampler);
+
+void dumb_reset_resampler_n(int n, DUMB_RESAMPLER *resampler, void *src, int src_channels, int32 pos, int32 start, int32 end, int quality);
+DUMB_RESAMPLER *dumb_start_resampler_n(int n, void *src, int src_channels, int32 pos, int32 start, int32 end, int quality);
+//int32 dumb_resample_n_1_1(int n, DUMB_RESAMPLER *resampler, sample_t *dst, int32 dst_size, DUMB_VOLUME_RAMP_INFO * volume, double delta);
+int32 dumb_resample_n_1_2(int n, DUMB_RESAMPLER *resampler, sample_t *dst, int32 dst_size, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, double delta);
+//int32 dumb_resample_n_2_1(int n, DUMB_RESAMPLER *resampler, sample_t *dst, int32 dst_size, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, double delta);
+int32 dumb_resample_n_2_2(int n, DUMB_RESAMPLER *resampler, sample_t *dst, int32 dst_size, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, double delta);
+//void dumb_resample_get_current_sample_n_1_1(int n, DUMB_RESAMPLER *resampler, DUMB_VOLUME_RAMP_INFO * volume, sample_t *dst);
+void dumb_resample_get_current_sample_n_1_2(int n, DUMB_RESAMPLER *resampler, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, sample_t *dst);
+//void dumb_resample_get_current_sample_n_2_1(int n, DUMB_RESAMPLER *resampler, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, sample_t *dst);
+void dumb_resample_get_current_sample_n_2_2(int n, DUMB_RESAMPLER *resampler, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, sample_t *dst);
+void dumb_end_resampler_n(int n, DUMB_RESAMPLER *resampler);
+
+/* This sets the default panning separation for hard panned formats,
+   or for formats with default panning information. This must be set
+   before using any readers or loaders, and is not really thread safe. */
+
+extern int dumb_it_default_panning_separation; /* in percent, default 25 */
+
+/* DUH Construction */
+
+DUH *make_duh(
+	int32 length,
+	int n_tags,
+	const char *const tag[][2],
+	int n_signals,
+	DUH_SIGTYPE_DESC *desc[],
+	sigdata_t *sigdata[]
+);
+
+void DUMBEXPORT duh_set_length(DUH *duh, int32 length);
+
+
+#ifdef __cplusplus
+	}
+#endif
+
+
+#endif /* DUMB_H */
diff --git a/libraries/dumb/include/internal/aldumb.h b/libraries/dumb/include/internal/aldumb.h
new file mode 100644
index 000000000..a0c6d63c0
--- /dev/null
+++ b/libraries/dumb/include/internal/aldumb.h
@@ -0,0 +1,27 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * internal/aldumb.h - The internal header file       / / \  \
+ *                     for DUMB with Allegro.        | <  /   \_
+ *                                                   |  \/ /\   /
+ *                                                    \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#ifndef INTERNAL_ALDUMB_H
+#define INTERNAL_ALDUMB_H
+
+
+void _dat_unload_duh(void *duh);
+
+
+#endif /* INTERNAL_DUMB_H */
diff --git a/libraries/dumb/include/internal/barray.h b/libraries/dumb/include/internal/barray.h
new file mode 100644
index 000000000..de9fab70c
--- /dev/null
+++ b/libraries/dumb/include/internal/barray.h
@@ -0,0 +1,41 @@
+#ifndef _B_ARRAY_H_
+#define _B_ARRAY_H_
+
+#include <stdlib.h>
+
+#ifdef BARRAY_DECORATE
+#define PASTE(a,b) a ## b
+#define EVALUATE(a,b) PASTE(a,b)
+#define bit_array_create EVALUATE(BARRAY_DECORATE,_bit_array_create)
+#define bit_array_destroy EVALUATE(BARRAY_DECORATE,_bit_array_destroy)
+#define bit_array_dup EVALUATE(BARRAY_DECORATE,_bit_array_dup)
+#define bit_array_reset EVALUATE(BARRAY_DECORATE,_bit_array_reset)
+#define bit_array_set EVALUATE(BARRAY_DECORATE,_bit_array_set)
+#define bit_array_set_range EVALUATE(BARRAY_DECORATE,_bit_array_set_range)
+#define bit_array_test EVALUATE(BARRAY_DECORATE,_bit_array_test)
+#define bit_array_test_range EVALUATE(BARRAY_DECORATE,_bit_array_test_range)
+#define bit_array_clear EVALUATE(BARRAY_DECORATE,_bit_array_clear)
+#define bit_array_clear_range EVALUATE(BARRAY_DECORATE,_bit_array_clear_range)
+#define bit_array_merge EVALUATE(BARRAY_DECORATE,_bit_array_merge)
+#define bit_array_mask EVALUATE(BARRAY_DECORATE,_bit_array_mask)
+#endif
+
+void * bit_array_create(size_t size);
+void bit_array_destroy(void * array);
+void * bit_array_dup(void * array);
+
+void bit_array_reset(void * array);
+
+void bit_array_set(void * array, size_t bit);
+void bit_array_set_range(void * array, size_t bit, size_t count);
+
+int bit_array_test(void * array, size_t bit);
+int bit_array_test_range(void * array, size_t bit, size_t count);
+
+void bit_array_clear(void * array, size_t bit);
+void bit_array_clear_range(void * array, size_t bit, size_t count);
+
+void bit_array_merge(void * array, void * source, size_t offset);
+void bit_array_mask(void * array, void * source, size_t offset);
+
+#endif
diff --git a/libraries/dumb/include/internal/dumb.h b/libraries/dumb/include/internal/dumb.h
new file mode 100644
index 000000000..bb2fe5c1c
--- /dev/null
+++ b/libraries/dumb/include/internal/dumb.h
@@ -0,0 +1,61 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * internal/dumb.h - DUMB's internal declarations.    / / \  \
+ *                                                   | <  /   \_
+ * This header file provides access to the           |  \/ /\   /
+ * internal structure of DUMB, and is liable          \_  /  > /
+ * to change, mutate or cease to exist at any           | \ / /
+ * moment. Include it at your own peril.                |  ' /
+ *                                                       \__/
+ * ...
+ *
+ * Seriously. You don't need access to anything in this file. All right, you
+ * probably do actually. But if you use it, you will be relying on a specific
+ * version of DUMB, so please check DUMB_VERSION defined in dumb.h. Please
+ * contact the authors so that we can provide a public API for what you need.
+ */
+
+#ifndef INTERNAL_DUMB_H
+#define INTERNAL_DUMB_H
+
+
+typedef struct DUH_SIGTYPE_DESC_LINK
+{
+	struct DUH_SIGTYPE_DESC_LINK *next;
+	DUH_SIGTYPE_DESC *desc;
+}
+DUH_SIGTYPE_DESC_LINK;
+
+
+typedef struct DUH_SIGNAL
+{
+	sigdata_t *sigdata;
+	DUH_SIGTYPE_DESC *desc;
+}
+DUH_SIGNAL;
+
+
+struct DUH
+{
+	int32 length;
+
+	int n_tags;
+	char *(*tag)[2];
+
+	int n_signals;
+	DUH_SIGNAL **signal;
+};
+
+
+DUH_SIGTYPE_DESC *_dumb_get_sigtype_desc(int32 type);
+
+
+#endif /* INTERNAL_DUMB_H */
diff --git a/libraries/dumb/include/internal/dumbfile.h b/libraries/dumb/include/internal/dumbfile.h
new file mode 100644
index 000000000..c83cc9a00
--- /dev/null
+++ b/libraries/dumb/include/internal/dumbfile.h
@@ -0,0 +1,13 @@
+#ifndef DUMBFILE_H
+#define DUMBFILE_H
+
+#include "../dumb.h"
+
+struct DUMBFILE
+{
+    const DUMBFILE_SYSTEM *dfs;
+    void *file;
+    long pos;
+};
+
+#endif // DUMBFILE_H
diff --git a/libraries/dumb/include/internal/it.h b/libraries/dumb/include/internal/it.h
new file mode 100644
index 000000000..b5806223b
--- /dev/null
+++ b/libraries/dumb/include/internal/it.h
@@ -0,0 +1,914 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * internal/it.h - Internal stuff for IT playback     / / \  \
+ *                 and MOD/XM/S3M conversion.        | <  /   \_
+ *                                                   |  \/ /\   /
+ * This header file provides access to the            \_  /  > /
+ * internal structure of DUMB, and is liable            | \ / /
+ * to change, mutate or cease to exist at any           |  ' /
+ * moment. Include it at your own peril.                 \__/
+ *
+ * ...
+ *
+ * Seriously. You don't need access to anything in this file. All right, you
+ * probably do actually. But if you use it, you will be relying on a specific
+ * version of DUMB, so please check DUMB_VERSION defined in dumb.h. Please
+ * contact the authors so that we can provide a public API for what you need.
+ */
+
+#ifndef INTERNAL_IT_H
+#define INTERNAL_IT_H
+
+
+#define BIT_ARRAY_BULLSHIT
+
+#include <stddef.h>
+
+#include "barray.h"
+
+
+/** TO DO: THINK ABOUT THE FOLLOWING:
+
+sigdata->flags & IT_COMPATIBLE_GXX
+
+                Bit 5: On = Link Effect G's memory with Effect E/F. Also
+                            Gxx with an instrument present will cause the
+                            envelopes to be retriggered. If you change a
+                            sample on a row with Gxx, it'll adjust the
+                            frequency of the current note according to:
+
+                              NewFrequency = OldFrequency * NewC5 / OldC5;
+*/
+
+
+
+/* These #defines are TEMPORARY. They are used to write alternative code to
+ * handle ambiguities in the format specification. The correct code in each
+ * case will be determined most likely by experimentation.
+ */
+//#define STEREO_SAMPLES_COUNT_AS_TWO
+#define INVALID_ORDERS_END_SONG
+#define SUSTAIN_LOOP_OVERRIDES_NORMAL_LOOP
+#define VOLUME_OUT_OF_RANGE_SETS_MAXIMUM
+
+
+
+#define SIGTYPE_IT DUMB_ID('I', 'T', ' ', ' ')
+
+#define IT_SIGNATURE            DUMB_ID('I', 'M', 'P', 'M')
+#define IT_INSTRUMENT_SIGNATURE DUMB_ID('I', 'M', 'P', 'I')
+#define IT_SAMPLE_SIGNATURE     DUMB_ID('I', 'M', 'P', 'S')
+
+// olivier sux
+#define IT_MPTX_SIGNATURE       DUMB_ID('X', 'T', 'P', 'M')
+#define IT_INSM_SIGNATURE       DUMB_ID('M', 'S', 'N', 'I')
+
+
+/* This is divided by the tempo times 256 to get the interval between ticks.
+ */
+#define TICK_TIME_DIVIDEND (65536 * 5 * 128)
+
+
+
+/* I'm not going to try to explain this, because I didn't derive it very
+ * formally ;)
+ */
+/* #define AMIGA_DIVISOR ((float)(4.0 * 14317056.0)) */
+/* I believe the following one to be more accurate. */
+//#define AMIGA_DIVISOR ((float)(8.0 * 7159090.5))
+#define AMIGA_CLOCK 3546895
+#define AMIGA_DIVISOR ((float)(16.0 * AMIGA_CLOCK))
+
+
+
+typedef struct IT_MIDI IT_MIDI;
+typedef struct IT_FILTER_STATE IT_FILTER_STATE;
+typedef struct IT_ENVELOPE IT_ENVELOPE;
+typedef struct IT_INSTRUMENT IT_INSTRUMENT;
+typedef struct IT_SAMPLE IT_SAMPLE;
+typedef struct IT_ENTRY IT_ENTRY;
+typedef struct IT_PATTERN IT_PATTERN;
+typedef struct IT_PLAYING_ENVELOPE IT_PLAYING_ENVELOPE;
+typedef struct IT_PLAYING IT_PLAYING;
+typedef struct IT_CHANNEL IT_CHANNEL;
+typedef struct IT_CHECKPOINT IT_CHECKPOINT;
+typedef struct IT_CALLBACKS IT_CALLBACKS;
+
+
+
+struct IT_MIDI
+{
+	unsigned char SFmacro[16][16]; // read these from 0x120
+	unsigned char SFmacrolen[16];
+	unsigned short SFmacroz[16]; /* Bitfield; bit 0 set = z in first position */
+	unsigned char Zmacro[128][16]; // read these from 0x320
+	unsigned char Zmacrolen[128];
+};
+
+
+
+struct IT_FILTER_STATE
+{
+	sample_t currsample, prevsample;
+};
+
+
+
+#define IT_ENVELOPE_ON                1
+#define IT_ENVELOPE_LOOP_ON           2
+#define IT_ENVELOPE_SUSTAIN_LOOP      4
+#define IT_ENVELOPE_CARRY             8
+#define IT_ENVELOPE_PITCH_IS_FILTER 128
+
+struct IT_ENVELOPE
+{
+	unsigned char flags;
+	unsigned char n_nodes;
+	unsigned char loop_start;
+	unsigned char loop_end;
+	unsigned char sus_loop_start;
+	unsigned char sus_loop_end;
+	signed char node_y[25];
+	unsigned short node_t[25];
+};
+
+
+
+#define NNA_NOTE_CUT      0
+#define NNA_NOTE_CONTINUE 1
+#define NNA_NOTE_OFF      2
+#define NNA_NOTE_FADE     3
+
+#define DCT_OFF        0
+#define DCT_NOTE       1
+#define DCT_SAMPLE     2
+#define DCT_INSTRUMENT 3
+
+#define DCA_NOTE_CUT  0
+#define DCA_NOTE_OFF  1
+#define DCA_NOTE_FADE 2
+
+struct IT_INSTRUMENT
+{
+	unsigned char name[27];
+	unsigned char filename[14];
+
+	int fadeout;
+
+	IT_ENVELOPE volume_envelope;
+	IT_ENVELOPE pan_envelope;
+	IT_ENVELOPE pitch_envelope;
+
+	unsigned char new_note_action;
+	unsigned char dup_check_type;
+	unsigned char dup_check_action;
+	signed char pp_separation;
+	unsigned char pp_centre;
+	unsigned char global_volume;
+	unsigned char default_pan;
+	unsigned char random_volume;
+	unsigned char random_pan;
+
+	unsigned char filter_cutoff;
+	unsigned char filter_resonance;
+
+	unsigned char map_note[120];
+	unsigned short map_sample[120];
+
+	//int output;
+};
+
+
+
+#define IT_SAMPLE_EXISTS              1
+#define IT_SAMPLE_16BIT               2
+#define IT_SAMPLE_STEREO              4
+#define IT_SAMPLE_LOOP               16
+#define IT_SAMPLE_SUS_LOOP           32
+#define IT_SAMPLE_PINGPONG_LOOP      64
+#define IT_SAMPLE_PINGPONG_SUS_LOOP 128
+
+#define IT_VIBRATO_SINE      0
+#define IT_VIBRATO_SAWTOOTH  1
+#define IT_VIBRATO_SQUARE    2
+#define IT_VIBRATO_RANDOM    3
+#define IT_VIBRATO_XM_SQUARE 4
+#define IT_VIBRATO_RAMP_DOWN 5
+#define IT_VIBRATO_RAMP_UP   6
+
+struct IT_SAMPLE
+{
+	unsigned char name[35];
+	unsigned char filename[15];
+	unsigned char flags;
+	unsigned char global_volume;
+	unsigned char default_volume;
+	unsigned char default_pan;
+	/* default_pan:
+	 *   0-255 for XM
+	 *   ignored for MOD
+	 *   otherwise, 0-64, and add 128 to enable
+	 */
+
+	int32 length;
+	int32 loop_start;
+	int32 loop_end;
+	int32 C5_speed;
+	int32 sus_loop_start;
+	int32 sus_loop_end;
+
+	unsigned char vibrato_speed;
+	unsigned char vibrato_depth;
+	unsigned char vibrato_rate;
+	unsigned char vibrato_waveform;
+
+	signed short   finetune;
+
+	void *data;
+
+	int max_resampling_quality;
+};
+
+
+
+#define IT_ENTRY_NOTE       1
+#define IT_ENTRY_INSTRUMENT 2
+#define IT_ENTRY_VOLPAN     4
+#define IT_ENTRY_EFFECT     8
+
+#define IT_SET_END_ROW(entry) ((entry)->channel = 255)
+#define IT_IS_END_ROW(entry) ((entry)->channel >= DUMB_IT_N_CHANNELS)
+
+#define IT_NOTE_OFF 255
+#define IT_NOTE_CUT 254
+
+#define IT_ENVELOPE_SHIFT 8
+
+#define IT_SURROUND 100
+#define IT_IS_SURROUND(pan) ((pan) > 64)
+#define IT_IS_SURROUND_SHIFTED(pan) ((pan) > 64 << IT_ENVELOPE_SHIFT)
+
+#define IT_SET_SPEED              1
+#define IT_JUMP_TO_ORDER          2
+#define IT_BREAK_TO_ROW           3
+#define IT_VOLUME_SLIDE           4
+#define IT_PORTAMENTO_DOWN        5
+#define IT_PORTAMENTO_UP          6
+#define IT_TONE_PORTAMENTO        7
+#define IT_VIBRATO                8
+#define IT_TREMOR                 9
+#define IT_ARPEGGIO              10
+#define IT_VOLSLIDE_VIBRATO      11
+#define IT_VOLSLIDE_TONEPORTA    12
+#define IT_SET_CHANNEL_VOLUME    13
+#define IT_CHANNEL_VOLUME_SLIDE  14
+#define IT_SET_SAMPLE_OFFSET     15
+#define IT_PANNING_SLIDE         16
+#define IT_RETRIGGER_NOTE        17
+#define IT_TREMOLO               18
+#define IT_S                     19
+#define IT_SET_SONG_TEMPO        20
+#define IT_FINE_VIBRATO          21
+#define IT_SET_GLOBAL_VOLUME     22
+#define IT_GLOBAL_VOLUME_SLIDE   23
+#define IT_SET_PANNING           24
+#define IT_PANBRELLO             25
+#define IT_MIDI_MACRO            26 //see MIDI.TXT
+
+/* Some effects needed for XM compatibility */
+#define IT_XM_PORTAMENTO_DOWN       27
+#define IT_XM_PORTAMENTO_UP         28
+#define IT_XM_FINE_VOLSLIDE_DOWN    29
+#define IT_XM_FINE_VOLSLIDE_UP      30
+#define IT_XM_RETRIGGER_NOTE        31
+#define IT_XM_KEY_OFF               32
+#define IT_XM_SET_ENVELOPE_POSITION 33
+
+/* More effects needed for PTM compatibility */
+#define IT_PTM_NOTE_SLIDE_DOWN        34
+#define IT_PTM_NOTE_SLIDE_UP          35
+#define IT_PTM_NOTE_SLIDE_DOWN_RETRIG 36
+#define IT_PTM_NOTE_SLIDE_UP_RETRIG   37
+
+/* More effects needed for OKT compatibility */
+#define IT_OKT_NOTE_SLIDE_DOWN        38
+#define IT_OKT_NOTE_SLIDE_DOWN_ROW    39
+#define IT_OKT_NOTE_SLIDE_UP          40
+#define IT_OKT_NOTE_SLIDE_UP_ROW      41
+#define IT_OKT_ARPEGGIO_3             42
+#define IT_OKT_ARPEGGIO_4             43
+#define IT_OKT_ARPEGGIO_5             44
+#define IT_OKT_VOLUME_SLIDE_DOWN      45
+#define IT_OKT_VOLUME_SLIDE_UP        46
+
+#define IT_N_EFFECTS                  47
+
+/* These represent the top nibble of the command value. */
+#define IT_S_SET_FILTER              0 /* Greyed out in IT... */
+#define IT_S_SET_GLISSANDO_CONTROL   1 /* Greyed out in IT... */
+#define IT_S_FINETUNE                2 /* Greyed out in IT... */
+#define IT_S_SET_VIBRATO_WAVEFORM    3
+#define IT_S_SET_TREMOLO_WAVEFORM    4
+#define IT_S_SET_PANBRELLO_WAVEFORM  5
+#define IT_S_FINE_PATTERN_DELAY      6
+#define IT_S7                        7
+#define IT_S_SET_PAN                 8
+#define IT_S_SET_SURROUND_SOUND      9
+#define IT_S_SET_HIGH_OFFSET        10
+#define IT_S_PATTERN_LOOP           11
+#define IT_S_DELAYED_NOTE_CUT       12
+#define IT_S_NOTE_DELAY             13
+#define IT_S_PATTERN_DELAY          14
+#define IT_S_SET_MIDI_MACRO         15
+
+/*
+S0x Set filter
+S1x Set glissando control
+S2x Set finetune
+
+
+S3x Set vibrato waveform to type x
+S4x Set tremelo waveform to type x
+S5x Set panbrello waveform to type x
+  Waveforms for commands S3x, S4x and S5x:
+    0: Sine wave
+    1: Ramp down
+    2: Square wave
+    3: Random wave
+S6x Pattern delay for x ticks
+S70 Past note cut
+S71 Past note off
+S72 Past note fade
+S73 Set NNA to note cut
+S74 Set NNA to continue
+S75 Set NNA to note off
+S76 Set NNA to note fade
+S77 Turn off volume envelope
+S78 Turn on volume envelope
+S79 Turn off panning envelope
+S7A Turn on panning envelope
+S7B Turn off pitch envelope
+S7C Turn on pitch envelope
+S8x Set panning position
+S91 Set surround sound
+SAy Set high value of sample offset yxx00h
+SB0 Set loopback point
+SBx Loop x times to loopback point
+SCx Note cut after x ticks
+SDx Note delay for x ticks
+SEx Pattern delay for x rows
+SFx Set parameterised MIDI Macro
+*/
+
+struct IT_ENTRY
+{
+	unsigned char channel; /* End of row if channel >= DUMB_IT_N_CHANNELS */
+	unsigned char mask;
+	unsigned char note;
+	unsigned char instrument;
+	unsigned char volpan;
+	unsigned char effect;
+	unsigned char effectvalue;
+};
+
+
+
+struct IT_PATTERN
+{
+	int n_rows;
+	int n_entries;
+	IT_ENTRY *entry;
+};
+
+
+
+#define IT_STEREO            1
+#define IT_USE_INSTRUMENTS   4
+#define IT_LINEAR_SLIDES     8 /* If not set, use Amiga slides */
+#define IT_OLD_EFFECTS      16
+#define IT_COMPATIBLE_GXX   32
+
+/* Make sure IT_WAS_AN_XM and IT_WAS_A_MOD aren't set accidentally */
+#define IT_REAL_FLAGS       63
+
+#define IT_WAS_AN_XM        64 /* Set for both XMs and MODs */
+#define IT_WAS_A_MOD       128
+
+#define IT_WAS_AN_S3M      256
+
+#define IT_WAS_A_PTM       512
+
+#define IT_WAS_A_669      1024
+
+#define IT_WAS_AN_OKT     2048
+
+#define IT_WAS_AN_STM     4096
+
+#define IT_WAS_PROCESSED  8192 /* Will be set the first time a sigdata passes through a sigrenderer */
+
+#define IT_ORDER_END  255
+#define IT_ORDER_SKIP 254
+
+struct DUMB_IT_SIGDATA
+{
+	unsigned char name[65];
+
+	unsigned char *song_message;
+
+	int n_orders;
+	int n_instruments;
+	int n_samples;
+	int n_patterns;
+	int n_pchannels;
+
+	int flags;
+
+	int global_volume;
+	int mixing_volume;
+	int speed;
+	int tempo;
+	int pan_separation;
+
+	unsigned char channel_pan[DUMB_IT_N_CHANNELS];
+	unsigned char channel_volume[DUMB_IT_N_CHANNELS];
+
+	unsigned char *order;
+	unsigned char restart_position; /* for XM compatiblity */
+
+	IT_INSTRUMENT *instrument;
+	IT_SAMPLE *sample;
+	IT_PATTERN *pattern;
+
+	IT_MIDI *midi;
+
+	IT_CHECKPOINT *checkpoint;
+};
+
+
+
+struct IT_PLAYING_ENVELOPE
+{
+	int next_node;
+	int tick;
+	int value;
+};
+
+
+
+#define IT_PLAYING_BACKGROUND 1
+#define IT_PLAYING_SUSTAINOFF 2
+#define IT_PLAYING_FADING     4
+#define IT_PLAYING_DEAD       8
+#define IT_PLAYING_REVERSE    16
+
+struct IT_PLAYING
+{
+	int flags;
+
+	int resampling_quality;
+
+	IT_CHANNEL *channel;
+	IT_SAMPLE *sample;
+	IT_INSTRUMENT *instrument;
+	IT_INSTRUMENT *env_instrument;
+
+	unsigned short sampnum;
+	unsigned char instnum;
+
+	unsigned char declick_stage;
+
+	float float_volume[2];
+	float ramp_volume[2];
+	float ramp_delta[2];
+
+	unsigned char channel_volume;
+
+	unsigned char volume;
+	unsigned short pan;
+
+	signed char volume_offset, panning_offset;
+
+	unsigned char note;
+
+	unsigned char enabled_envelopes;
+
+	unsigned char filter_cutoff;
+	unsigned char filter_resonance;
+
+	unsigned short true_filter_cutoff;   /* These incorporate the filter envelope, and will not */
+	unsigned char true_filter_resonance; /* be changed if they would be set to 127<<8 and 0.    */
+
+	unsigned char vibrato_speed;
+	unsigned char vibrato_depth;
+	unsigned char vibrato_n; /* May be specified twice: volpan & effect. */
+	unsigned char vibrato_time;
+	unsigned char vibrato_waveform;
+
+	unsigned char tremolo_speed;
+	unsigned char tremolo_depth;
+	unsigned char tremolo_time;
+	unsigned char tremolo_waveform;
+
+	unsigned char panbrello_speed;
+	unsigned char panbrello_depth;
+	unsigned char panbrello_time;
+	unsigned char panbrello_waveform;
+	signed char panbrello_random;
+
+	unsigned char sample_vibrato_time;
+	unsigned char sample_vibrato_waveform;
+	int sample_vibrato_depth; /* Starts at rate?0:depth, increases by rate */
+
+	int slide;
+	float delta;
+	int finetune;
+
+	IT_PLAYING_ENVELOPE volume_envelope;
+	IT_PLAYING_ENVELOPE pan_envelope;
+	IT_PLAYING_ENVELOPE pitch_envelope;
+
+	int fadeoutcount;
+
+	IT_FILTER_STATE filter_state[2]; /* Left and right */
+
+	DUMB_RESAMPLER resampler;
+
+	/* time_lost is used to emulate Impulse Tracker's sample looping
+	 * characteristics. When time_lost is added to pos, the result represents
+	 * the position in the theoretical version of the sample where all loops
+	 * have been expanded. If this is stored, the resampling helpers will
+	 * safely convert it for use with new loop boundaries. The situation is
+	 * slightly more complicated if dir == -1 when the change takes place; we
+	 * must reflect pos off the loop end point and set dir to 1 before
+	 * proceeding.
+	 */
+	int32 time_lost;
+
+	//int output;
+
+	IT_PLAYING *next;
+};
+
+
+
+#define IT_CHANNEL_MUTED 1
+
+#define IT_ENV_VOLUME  1
+#define IT_ENV_PANNING 2
+#define IT_ENV_PITCH   4
+
+struct IT_CHANNEL
+{
+	int flags;
+
+	unsigned char volume;
+	signed char volslide;
+	signed char xm_volslide;
+	signed char panslide;
+
+	/* xm_volslide is used for volume slides done in the volume column in an
+	 * XM file, since it seems the volume column slide is applied first,
+	 * followed by clamping, followed by the effects column slide. IT does
+	 * not exhibit this behaviour, so xm_volslide is maintained at zero.
+	 */
+
+	unsigned char pan;
+	unsigned short truepan;
+
+	unsigned char channelvolume;
+	signed char channelvolslide;
+
+	unsigned char instrument;
+	unsigned char note;
+
+	unsigned char SFmacro;
+
+	unsigned char filter_cutoff;
+	unsigned char filter_resonance;
+
+	unsigned char key_off_count;
+	unsigned char note_cut_count;
+	unsigned char note_delay_count;
+	IT_ENTRY *note_delay_entry;
+
+	unsigned char new_note_action;
+
+	unsigned char const* arpeggio_table;
+	signed char arpeggio_offsets[3];
+
+	int arpeggio_shift;
+	unsigned char retrig;
+	unsigned char xm_retrig;
+	int retrig_tick;
+
+	unsigned char tremor;
+	unsigned char tremor_time; /* Bit 6 set if note on; bit 7 set if tremor active. */
+
+	unsigned char vibrato_waveform;
+	unsigned char tremolo_waveform;
+	unsigned char panbrello_waveform;
+
+	int portamento;
+	int toneporta;
+	int toneslide;
+	unsigned char toneslide_tick, last_toneslide_tick, ptm_toneslide, ptm_last_toneslide, okt_toneslide;
+	unsigned char destnote;
+	unsigned char toneslide_retrig;
+
+	unsigned char glissando;
+
+	/** WARNING - for neatness, should one or both of these be in the IT_PLAYING struct? */
+	unsigned short sample;
+	unsigned char truenote;
+
+	unsigned char midi_state;
+
+	signed char lastvolslide;
+	unsigned char lastDKL;
+	unsigned char lastEF; /* Doubles as last portamento up for XM files */
+	unsigned char lastG;
+	unsigned char lastHspeed;
+	unsigned char lastHdepth;
+	unsigned char lastRspeed;
+	unsigned char lastRdepth;
+	unsigned char lastYspeed;
+	unsigned char lastYdepth;
+	unsigned char lastI;
+	unsigned char lastJ; /* Doubles as last portamento down for XM files */
+	unsigned char lastN;
+	unsigned char lastO;
+	unsigned char high_offset;
+	unsigned char lastP;
+	unsigned char lastQ;
+	unsigned char lastS;
+	unsigned char pat_loop_row;
+	unsigned char pat_loop_count;
+	unsigned char pat_loop_end_row; /* Used to catch infinite pattern loops */
+	unsigned char lastW;
+
+	unsigned char xm_lastE1;
+	unsigned char xm_lastE2;
+	unsigned char xm_lastEA;
+	unsigned char xm_lastEB;
+	unsigned char xm_lastX1;
+	unsigned char xm_lastX2;
+
+	unsigned char inv_loop_delay;
+	unsigned char inv_loop_speed;
+	int inv_loop_offset;
+
+	IT_PLAYING *playing;
+
+#ifdef BIT_ARRAY_BULLSHIT
+	void * played_patjump;
+	int played_patjump_order;
+#endif
+
+	//int output;
+};
+
+
+
+struct DUMB_IT_SIGRENDERER
+{
+	DUMB_IT_SIGDATA *sigdata;
+
+	int n_channels;
+
+	int resampling_quality;
+
+	unsigned char globalvolume;
+	signed char globalvolslide;
+
+	int tempo;
+	signed char temposlide;
+
+	IT_CHANNEL channel[DUMB_IT_N_CHANNELS];
+
+	IT_PLAYING *playing[DUMB_IT_N_NNA_CHANNELS];
+
+	int tick;
+	int speed;
+	int rowcount;
+
+	int order; /* Set to -1 if the song is terminated by a callback. */
+	int row;
+	int processorder;
+	int processrow;
+	int breakrow;
+
+	int restart_position;
+
+	int n_rows;
+
+	IT_ENTRY *entry_start;
+	IT_ENTRY *entry;
+	IT_ENTRY *entry_end;
+
+	int32 time_left; /* Time before the next tick is processed */
+	int sub_time_left;
+
+	DUMB_CLICK_REMOVER **click_remover;
+
+	IT_CALLBACKS *callbacks;
+
+#ifdef BIT_ARRAY_BULLSHIT
+	/* bit array, which rows are played, only checked by pattern break or loop commands */
+	void * played;
+#endif
+
+	int32 gvz_time;
+	int gvz_sub_time;
+
+    int ramp_style;
+    
+	//int max_output;
+
+	IT_PLAYING *free_playing;
+};
+
+
+
+struct IT_CHECKPOINT
+{
+	IT_CHECKPOINT *next;
+	int32 time;
+	DUMB_IT_SIGRENDERER *sigrenderer;
+};
+
+
+
+struct IT_CALLBACKS
+{
+	int (DUMBCALLBACK *loop)(void *data);
+	void *loop_data;
+	/* Return 1 to prevent looping; the music will terminate abruptly. If you
+	 * want to make the music stop but allow samples to fade (beware, as they
+	 * might not fade at all!), use dumb_it_sr_set_speed() and set the speed
+	 * to 0. Note that xm_speed_zero() will not be called if you set the
+	 * speed manually, and also that this will work for IT and S3M files even
+	 * though the music can't stop in this way by itself.
+	 */
+
+	int (DUMBCALLBACK *xm_speed_zero)(void *data);
+	void *xm_speed_zero_data;
+	/* Return 1 to terminate the mod, without letting samples fade. */
+
+	int (DUMBCALLBACK *midi)(void *data, int channel, unsigned char byte);
+	void *midi_data;
+	/* Return 1 to prevent DUMB from subsequently interpreting the MIDI bytes
+	 * itself. In other words, return 1 if the Zxx macros in an IT file are
+	 * controlling filters and shouldn't be.
+	 */
+
+	int (DUMBCALLBACK *global_volume_zero)(void *data);
+	void *global_volume_zero_data;
+	/* Return 1 to terminate the module when global volume is set to zero. */
+};
+
+
+
+void _dumb_it_end_sigrenderer(sigrenderer_t *sigrenderer);
+void _dumb_it_unload_sigdata(sigdata_t *vsigdata);
+
+extern DUH_SIGTYPE_DESC _dumb_sigtype_it;
+
+
+
+#define XM_APPREGIO                0
+#define XM_PORTAMENTO_UP           1
+#define XM_PORTAMENTO_DOWN         2
+#define XM_TONE_PORTAMENTO         3
+#define XM_VIBRATO                 4
+#define XM_VOLSLIDE_TONEPORTA      5
+#define XM_VOLSLIDE_VIBRATO        6
+#define XM_TREMOLO                 7
+#define XM_SET_PANNING             8
+#define XM_SAMPLE_OFFSET           9
+#define XM_VOLUME_SLIDE            10 /* A */
+#define XM_POSITION_JUMP           11 /* B */
+#define XM_SET_CHANNEL_VOLUME      12 /* C */
+#define XM_PATTERN_BREAK           13 /* D */
+#define XM_E                       14 /* E */
+#define XM_SET_TEMPO_BPM           15 /* F */
+#define XM_SET_GLOBAL_VOLUME       16 /* G */
+#define XM_GLOBAL_VOLUME_SLIDE     17 /* H */
+#define XM_KEY_OFF                 20 /* K (undocumented) */
+#define XM_SET_ENVELOPE_POSITION   21 /* L */
+#define XM_PANNING_SLIDE           25 /* P */
+#define XM_MULTI_RETRIG            27 /* R */
+#define XM_TREMOR                  29 /* T */
+#define XM_X                       33 /* X */
+#define XM_N_EFFECTS               (10+26)
+
+#define XM_E_SET_FILTER            0x0
+#define XM_E_FINE_PORTA_UP         0x1
+#define XM_E_FINE_PORTA_DOWN       0x2
+#define XM_E_SET_GLISSANDO_CONTROL 0x3
+#define XM_E_SET_VIBRATO_CONTROL   0x4
+#define XM_E_SET_FINETUNE          0x5
+#define XM_E_SET_LOOP              0x6
+#define XM_E_SET_TREMOLO_CONTROL   0x7
+#define XM_E_SET_PANNING           0x8
+#define XM_E_RETRIG_NOTE           0x9
+#define XM_E_FINE_VOLSLIDE_UP      0xA
+#define XM_E_FINE_VOLSLIDE_DOWN    0xB
+#define XM_E_NOTE_CUT              0xC
+#define XM_E_NOTE_DELAY            0xD
+#define XM_E_PATTERN_DELAY         0xE
+#define XM_E_SET_MIDI_MACRO        0xF
+
+#define XM_X_EXTRAFINE_PORTA_UP    1
+#define XM_X_EXTRAFINE_PORTA_DOWN  2
+
+/* To make my life a bit simpler during conversion, effect E:xy is converted
+ * to effect number EBASE+x:y. The same applies to effect X, and IT's S. That
+ * way, these effects can be manipulated like regular effects.
+ */
+#define EBASE              (XM_N_EFFECTS)
+#define XBASE              (EBASE+16)
+#define SBASE              (IT_N_EFFECTS)
+
+#define EFFECT_VALUE(x, y) (((x)<<4)|(y))
+#define HIGH(v)            ((v)>>4)
+#define LOW(v)             ((v)&0x0F)
+#define SET_HIGH(v, x)     v = (((x)<<4)|((v)&0x0F))
+#define SET_LOW(v, y)      v = (((v)&0xF0)|(y))
+#define BCD_TO_NORMAL(v)   (HIGH(v)*10+LOW(v))
+
+
+
+#if 0
+unsigned char **_dumb_malloc2(int w, int h);
+void _dumb_free2(unsigned char **line);
+#endif
+
+void _dumb_it_xm_convert_effect(int effect, int value, IT_ENTRY *entry, int mod);
+int _dumb_it_fix_invalid_orders(DUMB_IT_SIGDATA *sigdata);
+
+
+#define PTM_APPREGIO                0
+#define PTM_PORTAMENTO_UP           1
+#define PTM_PORTAMENTO_DOWN         2
+#define PTM_TONE_PORTAMENTO         3
+#define PTM_VIBRATO                 4
+#define PTM_VOLSLIDE_TONEPORTA      5
+#define PTM_VOLSLIDE_VIBRATO        6
+#define PTM_TREMOLO                 7
+#define PTM_SAMPLE_OFFSET           9
+#define PTM_VOLUME_SLIDE            10 /* A */
+#define PTM_POSITION_JUMP           11 /* B */
+#define PTM_SET_CHANNEL_VOLUME      12 /* C */
+#define PTM_PATTERN_BREAK           13 /* D */
+#define PTM_E                       14 /* E */
+#define PTM_SET_TEMPO_BPM           15 /* F */
+#define PTM_SET_GLOBAL_VOLUME       16 /* G */
+#define PTM_RETRIGGER               17 /* H */
+#define PTM_FINE_VIBRATO            18 /* I */
+#define PTM_NOTE_SLIDE_UP           19 /* J */
+#define PTM_NOTE_SLIDE_DOWN         20 /* K */
+#define PTM_NOTE_SLIDE_UP_RETRIG    21 /* L */
+#define PTM_NOTE_SLIDE_DOWN_RETRIG  22 /* M */
+#define PTM_N_EFFECTS               23
+
+#define PTM_E_FINE_PORTA_DOWN       0x1
+#define PTM_E_FINE_PORTA_UP         0x2
+#define PTM_E_SET_VIBRATO_CONTROL   0x4
+#define PTM_E_SET_FINETUNE          0x5
+#define PTM_E_SET_LOOP              0x6
+#define PTM_E_SET_TREMOLO_CONTROL   0x7
+#define PTM_E_SET_PANNING           0x8
+#define PTM_E_RETRIG_NOTE           0x9
+#define PTM_E_FINE_VOLSLIDE_UP      0xA
+#define PTM_E_FINE_VOLSLIDE_DOWN    0xB
+#define PTM_E_NOTE_CUT              0xC
+#define PTM_E_NOTE_DELAY            0xD
+#define PTM_E_PATTERN_DELAY         0xE
+
+/* To make my life a bit simpler during conversion, effect E:xy is converted
+ * to effect number EBASE+x:y. The same applies to effect X, and IT's S. That
+ * way, these effects can be manipulated like regular effects.
+ */
+#define PTM_EBASE              (PTM_N_EFFECTS)
+
+void _dumb_it_ptm_convert_effect(int effect, int value, IT_ENTRY *entry);
+
+int32 _dumb_it_read_sample_data_adpcm4(IT_SAMPLE *sample, DUMBFILE *f);
+
+void _dumb_it_interleave_stereo_sample(IT_SAMPLE *sample);
+
+/* Calling either of these is optional */
+void _dumb_init_cubic();
+#ifdef _USE_SSE
+void _dumb_init_sse();
+#endif
+
+#endif /* INTERNAL_IT_H */
diff --git a/libraries/dumb/include/internal/lpc.h b/libraries/dumb/include/internal/lpc.h
new file mode 100644
index 000000000..47fb03334
--- /dev/null
+++ b/libraries/dumb/include/internal/lpc.h
@@ -0,0 +1,30 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2007             *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function: LPC low level routines
+  last mod: $Id: lpc.h 16037 2009-05-26 21:10:58Z xiphmont $
+
+ ********************************************************************/
+
+#ifndef _V_LPC_H_
+#define _V_LPC_H_
+
+/* simple linear scale LPC code */
+extern float vorbis_lpc_from_data(float *data,float *lpc,int n,int m);
+
+extern void vorbis_lpc_predict(float *coeff,float *prime,int m,
+                               float *data,long n);
+
+struct DUMB_IT_SIGDATA;
+extern void dumb_it_add_lpc(struct DUMB_IT_SIGDATA *sigdata);
+
+#endif
diff --git a/libraries/dumb/include/internal/mulsc.h b/libraries/dumb/include/internal/mulsc.h
new file mode 100644
index 000000000..57d6ec291
--- /dev/null
+++ b/libraries/dumb/include/internal/mulsc.h
@@ -0,0 +1,36 @@
+#ifndef INTERNAL_MULSC_H
+#define INTERNAL_MULSC_H
+
+#if !defined(_MSC_VER) || !defined(_M_IX86) || _MSC_VER >= 1800
+//#define MULSC(a, b) ((int)((LONG_LONG)(a) * (b) >> 16))
+//#define MULSC(a, b) ((a) * ((b) >> 2) >> 14)
+#define MULSCV(a, b) ((int)((LONG_LONG)(a) * (b) >> 32))
+#define MULSCA(a, b) ((int)((LONG_LONG)((a) << 4) * (b) >> 32))
+#define MULSC(a, b) ((int)((LONG_LONG)((a) << 4) * ((b) << 12) >> 32))
+#define MULSC16(a, b) ((int)((LONG_LONG)((a) << 12) * ((b) << 12) >> 32))
+#else
+/* VC++ calls __allmull and __allshr for the above math. I don't know why.
+ * [Need to check if this still applies to recent versions of the compiler.] */
+static __forceinline unsigned long long MULLL(int a, int b)
+{
+	__asm mov eax,a
+	__asm imul b
+}
+static __forceinline int MULSCV (int a, int b)
+{
+#ifndef _DEBUG
+	union { unsigned long long q; struct { int l, h; }; } val;
+	val.q = MULLL(a,b);
+	return val.h;
+#else
+	__asm mov eax,a
+	__asm imul b
+	__asm mov eax,edx
+#endif
+}
+#define MULSCA(a, b)  MULSCV((a) << 4, b)
+#define MULSC(a, b)   MULSCV((a) << 4, (b) << 12)
+#define MULSC16(a, b) MULSCV((a) << 12, (b) << 12)
+#endif
+
+#endif /* INTERNAL_MULSC_H */
\ No newline at end of file
diff --git a/libraries/dumb/include/internal/resampler.h b/libraries/dumb/include/internal/resampler.h
new file mode 100644
index 000000000..0050ebf1a
--- /dev/null
+++ b/libraries/dumb/include/internal/resampler.h
@@ -0,0 +1,58 @@
+#ifndef _RESAMPLER_H_
+#define _RESAMPLER_H_
+
+// Ugglay
+#ifdef RESAMPLER_DECORATE
+#define PASTE(a,b) a ## b
+#define EVALUATE(a,b) PASTE(a,b)
+#define resampler_init EVALUATE(RESAMPLER_DECORATE,_resampler_init)
+#define resampler_create EVALUATE(RESAMPLER_DECORATE,_resampler_create)
+#define resampler_delete EVALUATE(RESAMPLER_DECORATE,_resampler_delete)
+#define resampler_dup EVALUATE(RESAMPLER_DECORATE,_resampler_dup)
+#define resampler_dup_inplace EVALUATE(RESAMPLER_DECORATE,_resampler_dup_inplace)
+#define resampler_set_quality EVALUATE(RESAMPLER_DECORATE,_resampler_set_quality)
+#define resampler_get_free_count EVALUATE(RESAMPLER_DECORATE,_resampler_get_free_count)
+#define resampler_write_sample EVALUATE(RESAMPLER_DECORATE,_resampler_write_sample)
+#define resampler_write_sample_fixed EVALUATE(RESAMPLER_DECORATE,_resampler_write_sample_fixed)
+#define resampler_set_rate EVALUATE(RESAMPLER_DECORATE,_resampler_set_rate)
+#define resampler_ready EVALUATE(RESAMPLER_DECORATE,_resampler_ready)
+#define resampler_clear EVALUATE(RESAMPLER_DECORATE,_resampler_clear)
+#define resampler_get_sample_count EVALUATE(RESAMPLER_DECORATE,_resampler_get_sample_count)
+#define resampler_get_sample EVALUATE(RESAMPLER_DECORATE,_resampler_get_sample)
+#define resampler_get_sample_float EVALUATE(RESAMPLER_DECORATE,_resampler_get_sample_float)
+#define resampler_remove_sample EVALUATE(RESAMPLER_DECORATE,_resampler_remove_sample)
+#endif
+
+void resampler_init(void);
+
+void * resampler_create(void);
+void resampler_delete(void *);
+void * resampler_dup(const void *);
+void resampler_dup_inplace(void *, const void *);
+
+enum
+{
+    RESAMPLER_QUALITY_MIN = 0,
+    RESAMPLER_QUALITY_ZOH = 0,
+    RESAMPLER_QUALITY_BLEP = 1,
+    RESAMPLER_QUALITY_LINEAR = 2,
+    RESAMPLER_QUALITY_BLAM = 3,
+    RESAMPLER_QUALITY_CUBIC = 4,
+    RESAMPLER_QUALITY_SINC = 5,
+    RESAMPLER_QUALITY_MAX = 5
+};
+
+void resampler_set_quality(void *, int quality);
+
+int resampler_get_free_count(void *);
+void resampler_write_sample(void *, short sample);
+void resampler_write_sample_fixed(void *, int sample, unsigned char depth);
+void resampler_set_rate( void *, double new_factor );
+int resampler_ready(void *);
+void resampler_clear(void *);
+int resampler_get_sample_count(void *);
+int resampler_get_sample(void *);
+float resampler_get_sample_float(void *);
+void resampler_remove_sample(void *, int decay);
+
+#endif
diff --git a/libraries/dumb/include/internal/riff.h b/libraries/dumb/include/internal/riff.h
new file mode 100644
index 000000000..54c87c47c
--- /dev/null
+++ b/libraries/dumb/include/internal/riff.h
@@ -0,0 +1,24 @@
+#ifndef RIFF_H
+#define RIFF_H
+
+struct riff;
+
+struct riff_chunk
+{
+	unsigned type;
+    int32 offset;
+	unsigned size;
+    struct riff * nested;
+};
+
+struct riff
+{
+	unsigned type;
+	unsigned chunk_count;
+	struct riff_chunk * chunks;
+};
+
+struct riff * riff_parse( DUMBFILE * f, int32 offset, int32 size, unsigned proper );
+void riff_free( struct riff * );
+
+#endif
diff --git a/libraries/dumb/include/internal/stack_alloc.h b/libraries/dumb/include/internal/stack_alloc.h
new file mode 100644
index 000000000..4cab5b9c6
--- /dev/null
+++ b/libraries/dumb/include/internal/stack_alloc.h
@@ -0,0 +1,113 @@
+/* Copyright (C) 2002 Jean-Marc Valin */
+/**
+   @file stack_alloc.h
+   @brief Temporary memory allocation on stack
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   - Neither the name of the Xiph.org Foundation nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef STACK_ALLOC_H
+#define STACK_ALLOC_H
+
+#ifdef _WIN32
+# include <malloc.h>
+#else
+# ifdef HAVE_ALLOCA_H
+#  include <alloca.h>
+# else
+#  include <stdlib.h>
+# endif
+#endif
+
+/**
+ * @def ALIGN(stack, size)
+ *
+ * Aligns the stack to a 'size' boundary
+ *
+ * @param stack Stack
+ * @param size  New size boundary
+ */
+
+/**
+ * @def PUSH(stack, size, type)
+ *
+ * Allocates 'size' elements of type 'type' on the stack
+ *
+ * @param stack Stack
+ * @param size  Number of elements
+ * @param type  Type of element
+ */
+
+/**
+ * @def VARDECL(var)
+ *
+ * Declare variable on stack
+ *
+ * @param var Variable to declare
+ */
+
+/**
+ * @def ALLOC(var, size, type)
+ *
+ * Allocate 'size' elements of 'type' on stack
+ *
+ * @param var  Name of variable to allocate
+ * @param size Number of elements
+ * @param type Type of element
+ */
+
+#ifdef ENABLE_VALGRIND
+
+#include <valgrind/memcheck.h>
+
+#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1))
+
+#define PUSH(stack, size, type) (VALGRIND_MAKE_NOACCESS(stack, 1000),ALIGN((stack),sizeof(type)),VALGRIND_MAKE_WRITABLE(stack, ((size)*sizeof(type))),(stack)+=((size)*sizeof(type)),(type*)((stack)-((size)*sizeof(type))))
+
+#else
+
+#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1))
+
+#define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)),(stack)+=((size)*sizeof(type)),(type*)((stack)-((size)*sizeof(type))))
+
+#endif
+
+#if defined(VAR_ARRAYS)
+#define VARDECL(var)
+#define ALLOC(var, size, type) type var[size]
+#elif defined(USE_ALLOCA)
+#define VARDECL(var) var
+#define ALLOC(var, size, type) var = alloca(sizeof(type)*(size))
+#else
+#define VARDECL(var) var
+#define ALLOC(var, size, type) var = PUSH(stack, size, type)
+#endif
+
+
+#endif
diff --git a/libraries/dumb/licence.txt b/libraries/dumb/licence.txt
new file mode 100644
index 000000000..961fe4ef8
--- /dev/null
+++ b/libraries/dumb/licence.txt
@@ -0,0 +1,87 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * licence.txt - Conditions for use of DUMB.          / / \  \
+ *                                                   | <  /   \_
+ * If you do not agree to these terms, please        |  \/ /\   /
+ * do not use DUMB.                                   \_  /  > /
+ *                                                      | \ / /
+ * Information in [brackets] is provided to aid         |  ' /
+ * interpretation of the licence.                        \__/
+ */
+
+
+Dynamic Universal Music Bibliotheque, Version 0.9.3
+
+Copyright (C) 2001-2005 Ben Davis, Robert J Ohannessian and Julien Cugniere
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event shall the authors be held liable for any damages arising from the
+use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim
+   that you wrote the original software. If you use this software in a
+   product, you are requested to acknowledge its use in the product
+   documentation, along with details on where to get an unmodified version of
+   this software, but this is not a strict requirement.
+
+   [Note that the above point asks for a link to DUMB, not just a mention.
+   Googling for DUMB doesn't help much! The URL is "http://dumb.sf.net/".]
+
+   [The link was originally strictly required. This was changed for two
+   reasons. Firstly, if many projects request an acknowledgement, the list of
+   acknowledgements can become quite unmanageable. Secondly, DUMB was placing
+   a restriction on the code using it, preventing people from using the GNU
+   General Public Licence which disallows any such restrictions. See
+   http://www.gnu.org/philosophy/bsd.html for more information on this
+   subject. However, if DUMB plays a significant part in your project, we do
+   urge you to acknowledge its use.]
+
+2. Altered source versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.
+
+3. This notice may not be removed from or altered in any source distribution.
+
+4. If you are using the Program in someone else's bedroom on any Monday at
+   3:05 pm, you are not allowed to modify the Program for ten minutes. [This
+   clause provided by Inphernic; every licence should contain at least one
+   clause, the reasoning behind which is far from obvious.]
+
+5. Users who wish to use DUMB for the specific purpose of playing music are
+   required to feed their dog on every full moon (if deemed appropriate).
+   [This clause provided by Allefant, who couldn't remember what Inphernic's
+   clause was.]
+
+6. No clause in this licence shall prevent this software from being depended
+   upon by a product licensed under the GNU General Public Licence. If such a
+   clause is deemed to exist, Debian, then it shall be respected in spirit as
+   far as possible and all other clauses shall continue to apply in full
+   force.
+
+8. Take the number stated as introducing this clause. Multiply it by two,
+   then subtract four. Now insert a '+' between the two digits and evaluate
+   the resulting sum. Call the result 'x'. If you have not yet concluded that
+   every numbered clause in this licence whose ordinal number is strictly
+   greater than 'x' (with the exception of the present clause) is null and
+   void, Debian, then you are hereby informed that laughter is good for one's
+   health and you are warmly suggested to do it. By the way, Clauses 4, 5 and
+   6 are null and void. Incidentally, I like Kubuntu. The work you guys do is
+   awesome. (Lawyers, on the other hand ...)
+
+We regret that we cannot provide any warranty, not even the implied warranty
+of merchantability or fitness for a particular purpose.
+
+Some files generated or copied by automake, autoconf and friends are
+available in an extra download. These fall under separate licences but are
+all free to distribute. Please check their licences as necessary.
diff --git a/libraries/dumb/prj/.gitignore b/libraries/dumb/prj/.gitignore
new file mode 100644
index 000000000..36d588baa
--- /dev/null
+++ b/libraries/dumb/prj/.gitignore
@@ -0,0 +1,3 @@
+dumb-build-Desktop-Release
+dumb-build-Desktop-Debug
+*.user
diff --git a/libraries/dumb/prj/dumb/dumb.pro b/libraries/dumb/prj/dumb/dumb.pro
new file mode 100644
index 000000000..9244ce4bd
--- /dev/null
+++ b/libraries/dumb/prj/dumb/dumb.pro
@@ -0,0 +1,128 @@
+#-------------------------------------------------
+#
+# Project created by QtCreator 2012-12-22T16:33:53
+#
+#-------------------------------------------------
+
+QT       -= core gui
+
+TARGET = dumb
+TEMPLATE = lib
+CONFIG += staticlib
+
+DEFINES += _USE_SSE
+
+INCLUDEPATH += ../../include
+
+QMAKE_CFLAGS += -msse
+
+SOURCES += \
+    ../../src/core/unload.c \
+    ../../src/core/rendsig.c \
+    ../../src/core/rendduh.c \
+    ../../src/core/register.c \
+    ../../src/core/readduh.c \
+    ../../src/core/rawsig.c \
+    ../../src/core/makeduh.c \
+    ../../src/core/loadduh.c \
+    ../../src/core/dumbfile.c \
+    ../../src/core/duhtag.c \
+    ../../src/core/duhlen.c \
+    ../../src/core/atexit.c \
+    ../../src/helpers/stdfile.c \
+    ../../src/helpers/silence.c \
+    ../../src/helpers/sampbuf.c \
+    ../../src/helpers/riff.c \
+    ../../src/helpers/resample.c \
+    ../../src/helpers/memfile.c \
+    ../../src/helpers/clickrem.c \
+    ../../src/helpers/barray.c \
+    ../../src/it/xmeffect.c \
+    ../../src/it/readxm2.c \
+    ../../src/it/readxm.c \
+    ../../src/it/readstm2.c \
+    ../../src/it/readstm.c \
+    ../../src/it/reads3m2.c \
+    ../../src/it/reads3m.c \
+    ../../src/it/readriff.c \
+    ../../src/it/readptm.c \
+    ../../src/it/readpsm.c \
+    ../../src/it/readoldpsm.c \
+    ../../src/it/readokt2.c \
+    ../../src/it/readokt.c \
+    ../../src/it/readmtm.c \
+    ../../src/it/readmod2.c \
+    ../../src/it/readmod.c \
+    ../../src/it/readdsmf.c \
+    ../../src/it/readasy.c \
+    ../../src/it/readamf2.c \
+    ../../src/it/readamf.c \
+    ../../src/it/readam.c \
+    ../../src/it/read6692.c \
+    ../../src/it/read669.c \
+    ../../src/it/ptmeffect.c \
+    ../../src/it/loadxm2.c \
+    ../../src/it/loadxm.c \
+    ../../src/it/loadstm2.c \
+    ../../src/it/loadstm.c \
+    ../../src/it/loads3m2.c \
+    ../../src/it/loads3m.c \
+    ../../src/it/loadriff2.c \
+    ../../src/it/loadriff.c \
+    ../../src/it/loadptm2.c \
+    ../../src/it/loadptm.c \
+    ../../src/it/loadpsm2.c \
+    ../../src/it/loadpsm.c \
+    ../../src/it/loadoldpsm2.c \
+    ../../src/it/loadoldpsm.c \
+    ../../src/it/loadokt2.c \
+    ../../src/it/loadokt.c \
+    ../../src/it/loadmtm2.c \
+    ../../src/it/loadmtm.c \
+    ../../src/it/loadmod2.c \
+    ../../src/it/loadmod.c \
+    ../../src/it/loadasy2.c \
+    ../../src/it/loadasy.c \
+    ../../src/it/loadamf2.c \
+    ../../src/it/loadamf.c \
+    ../../src/it/load6692.c \
+    ../../src/it/load669.c \
+    ../../src/it/itunload.c \
+    ../../src/it/itrender.c \
+    ../../src/it/itread2.c \
+    ../../src/it/itread.c \
+    ../../src/it/itorder.c \
+    ../../src/it/itmisc.c \
+    ../../src/it/itload2.c \
+    ../../src/it/itload.c \
+    ../../src/it/readany.c \
+    ../../src/it/loadany2.c \
+    ../../src/it/loadany.c \
+    ../../src/it/readany2.c \
+    ../../src/helpers/sinc_resampler.c \
+    ../../src/helpers/lpc.c
+
+HEADERS += \
+    ../../include/dumb.h \
+    ../../include/internal/riff.h \
+    ../../include/internal/it.h \
+    ../../include/internal/dumb.h \
+    ../../include/internal/barray.h \
+    ../../include/internal/aldumb.h \
+    ../../include/internal/sinc_resampler.h \
+    ../../include/internal/stack_alloc.h \
+    ../../include/internal/lpc.h \
+    ../../include/internal/dumbfile.h
+unix:!symbian {
+    maemo5 {
+        target.path = /opt/usr/lib
+    } else {
+        target.path = /usr/lib
+    }
+    INSTALLS += target
+}
+
+OTHER_FILES += \
+    ../../src/helpers/resample.inc \
+    ../../src/helpers/resamp3.inc \
+    ../../src/helpers/resamp2.inc
diff --git a/libraries/dumb/readme.txt b/libraries/dumb/readme.txt
new file mode 100644
index 000000000..e86af048a
--- /dev/null
+++ b/libraries/dumb/readme.txt
@@ -0,0 +1,541 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * readme.txt - General information on DUMB.          / / \  \
+ *                                                   | <  /   \_
+ *                                                   |  \/ /\   /
+ *                                                    \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+
+********************
+*** Introduction ***
+********************
+
+
+Thank you for downloading DUMB v0.9.3! You should have the following
+documentation:
+
+   readme.txt    - This file
+   licence.txt   - Conditions for the use of this software
+   release.txt   - Release notes and changes for this and past releases
+   docs/
+     howto.txt   - Step-by-step instructions on adding DUMB to your project
+     faq.txt     - Frequently asked questions and answers to them
+     dumb.txt    - DUMB library reference
+     deprec.txt  - Information about deprecated parts of the API
+     ptr.txt     - Quick introduction to pointers for those who need it
+     fnptr.txt   - Explanation of function pointers for those who need it
+     modplug.txt - Our official position regarding ModPlug Tracker
+
+This file will help you get DUMB set up. If you have not yet done so, please
+read licence.txt and release.txt before proceeding. After you've got DUMB set
+up, please refer to the files in the docs/ directory at your convenience. I
+recommend you start with howto.txt.
+
+
+****************
+*** Features ***
+****************
+
+
+Here is the statutory feature list:
+
+- Freeware
+
+- Supports playback of IT, XM, S3M and MOD files
+
+- Faithful to the original trackers, especially IT; if it plays your module
+  wrongly, please tell me so I can fix the bug! (But please don't complain
+  about differences between DUMB and ModPlug Tracker; see docs/modplug.txt)
+
+- Accurate support for low-pass resonant filters for IT files
+
+- Very accurate timing and pitching; completely deterministic playback
+
+- Click removal
+
+- Facility to embed music files in other files (e.g. Allegro datafiles)
+
+- Three resampling quality settings: aliasing, linear interpolation and cubic
+  interpolation
+
+- Number of samples playing at once can be limited to reduce processor usage,
+  but samples will come back in when other louder ones stop
+
+- All notes will be present and correct even if you start a piece of music in
+  the middle
+
+- Option to take longer loading but seek fast to any point before the music
+  first loops (seeking time increases beyond this point)
+
+- Audio generated can be used in any way; DUMB does not necessarily send it
+  straight to a sound output system
+
+- Can be used with Allegro, can be used without (if you'd like to help make
+  DUMB more approachable to people who aren't using Allegro, please contact
+  me)
+
+- Makefile provided for DJGPP, MinGW, Linux, BeOS and Mac OS X
+
+- Project files provided for MSVC 6
+
+- Autotools-based configure script available as a separate download for
+  masochists
+
+- Code should port anywhere that has a 32-bit C compiler; instructions on
+  compiling it manually are available further down
+
+
+*********************
+*** What you need ***
+*********************
+
+
+To use DUMB, you need a 32-bit C compiler (GCC and MSVC are fine). If you
+have Allegro, DUMB can integrate with its audio streams and datafiles, making
+your life easier. If you do not wish to use Allegro, you will have to do some
+work to get music playing back. The 'dumbplay' example program requires
+Allegro.
+
+   Allegro - http://alleg.sf.net/
+
+
+**********************************************
+*** How to set DUMB up with DJGPP or MinGW ***
+**********************************************
+
+
+You should have got the .zip version. If for some reason you got the .tar.gz
+version instead, you may have to convert make/config.bat to DOS text file
+format. WinZip does this automatically by default. Otherwise, loading it into
+MS EDIT and saving it again should do the trick (but do not do this to the
+Makefiles as it destroys tabs). You will have to do the same for any files
+you want to view in Windows Notepad. If you have problems, just go and
+download the .zip instead.
+
+Make sure you preserved the directory structure when you extracted DUMB from
+the archive. Most unzipping programs will do this by default, but pkunzip
+requires you to pass -d. If not, please delete DUMB and extract it again
+properly.
+
+If you are using Windows, open an MS-DOS Prompt or a Windows Command Line.
+Change to the directory into which you unzipped DUMB.
+
+If you are using MinGW (and you haven't renamed 'mingw32-make'), type:
+
+   mingw32-make
+
+Otherwise, type the following:
+
+   make
+
+DUMB will ask you whether you wish to compile for DJGPP or MinGW. Then it
+will ask you whether you want support for Allegro. (You have to have made and
+installed Allegro's optimised library for this to work.) Finally, it will
+compile optimised and debugging builds of DUMB, along with the example
+programs. When it has finished, run one of the following to install the
+libraries:
+
+   make install
+   mingw32-make install
+
+All done! If you ever need the configuration again (e.g. if you compiled for
+DJGPP before and you want to compile for MinGW now), run one of the
+following:
+
+   make config
+   mingw32-make config
+
+See the comments in the Makefile for other targets.
+
+Note: the Makefile will only work properly if you have COMSPEC or ComSpec set
+to point to command.com or cmd.exe. If you set it to point to a Unix-style
+shell, the Makefile won't work.
+
+Please let me know if you have any trouble.
+
+As an alternative, MSYS users may attempt to use the configure script,
+available in dumb-0.9.3-autotools.tar.gz. This has been found to work without
+Allegro, and is untested with Allegro. I should appreciate feedback from
+anyone else who tries this. I do not recommend its use, partly because it
+creates dynamically linked libraries and I don't know how to stop it from
+doing that (see the section on compiling DUMB manually), and partly because
+autotools are plain evil.
+
+Scroll down for information on the example programs. Refer to docs/howto.txt
+when you are ready to start programming with DUMB. If you use DUMB in a game,
+let me know - I might decide to place a link to your game on DUMB's website!
+
+
+******************************************************
+*** How to set DUMB up with Microsoft Visual C++ 6 ***
+******************************************************
+
+
+If you have a newer version of Microsoft Visual C++ or Visual Something that
+supports C++, please try these instructions and let me know if it works.
+
+You should have got the .zip version. If for some reason you got the .tar.gz
+version instead, you may have to convert some files to DOS text file format.
+WinZip does this automatically by default. Otherwise, loading such files into
+MS EDIT and saving them again should do the trick. You will have to do this
+for any files you want to view in Windows Notepad. If you have problems, just
+go and download the .zip instead.
+
+Make sure you preserved the directory structure when you extracted DUMB from
+the archive. Most unzipping programs will do this by default, but pkunzip
+requires you to pass -d. If not, please delete DUMB and extract it again
+properly.
+
+DUMB comes with a workspace Microsoft Visual C++ 6, containing projects for
+the DUMB core, the Allegro interface library and each of the examples. The
+first thing you might want to do is load the workspace up and have a look
+around. You will find it in the dumb\vc6 directory under the name dumb.dsw.
+Note that the aldumb and dumbplay projects require Allegro, so they won't
+work if you don't have Allegro. Nevertheless, dumbplay is the best-commented
+of the examples, so do have a look.
+
+When you are ready to add DUMB to your project, follow these instructions:
+
+1. Open your project in VC++.
+2. Select Project|Insert Project into Workspace...
+3. Navigate to the dumb\vc6\dumb directory and select dumb.dsp.
+   Alternatively, if you know that you are statically linking with a library
+   that uses the statically linked multithreaded runtime (/MT), you may wish
+   to select dumb_static.dsp in the dumb_static subdirectory instead.
+4. Select Build|Set Active Configuration..., and reselect one of your
+   project's configurations.
+5. Select Project|Dependencies... and ensure your project is dependent on
+   DUMB.
+6. Select Project|Settings..., Settings for: All Configurations, C/C++ tab,
+   Preprocessor category. Add the DUMB include directory to the Additional
+   Include Directories box.
+7. Ensure that for all the projects in the workspace (or more likely just all
+   the projects in a particular dependency chain) the run-time libraries are
+   the same. That's in Project|Settings, C/C++ tab, Code generation category,
+   Use run-time library dropdown. The settings for Release and Debug are
+   separate, so you'll have to change them one at a time. Exactly which run-
+   time library you use will depend on what you need; it doesn't appear that
+   DUMB has any particular requirements, so set it to whatever you're using
+   now. (It will have to be /MD, the multithreaded DLL library, if you are
+   statically linking with Allegro. If you are dynamically linking with
+   Allegro than it doesn't matter.)
+8. If you are using Allegro, do some or all of the above for the aldumb.dsp
+   project in the aldumb directory too.
+
+Good thing you only have to do all that once ... or twice ...
+
+If you have the Intel compiler installed, it will - well, should - be used to
+compile DUMB. The only setting I [Tom Seddon] added is /QxiM. This allows the
+compiler to use PPro and MMX instructions, and so when compiling with Intel
+the resultant EXE will require a Pentium II or greater. I don't think this is
+unreasonable. After all, it is 2003 :)
+
+[Note from Ben: the Intel compiler is evil! It makes AMD processors look bad!
+Patch it or boycott it or something!]
+
+If you don't have the Intel compiler, VC will compile DUMB as normal.
+
+This project file and these instructions were provided by Tom Seddon (I hope
+I got his name right; I had to guess it from his e-mail address!). Chad
+Austin has since changed the project files around, and I've just attempted to
+hack them to incorporate new source files. I've also tried to update the
+instructions using guesswork and some knowledge of Visual J++ (you heard me).
+The instructions and the project files are to this day untested by me. If you
+have problems, check the download page at http://dumb.sf.net/ to see if they
+are addressed; failing that, direct queries to me and I'll try to figure them
+out.
+
+If you have any comments at all on how the VC6 projects are laid out, or how
+the instructions could be improved, I should be really grateful to hear them.
+I am a perfectionist, after all. :)
+
+Scroll down for information on the example programs. When you are ready to
+start using DUMB, refer to docs/howto.txt. If you use DUMB in a game, let me
+know - I might decide to place a link to your game on DUMB's website!
+
+
+******************************************************
+*** How to set DUMB up on Linux, BeOS and Mac OS X ***
+******************************************************
+
+
+You should have got the .tar.gz version. If for some reason you got the .zip
+version instead, you may have to strip all characters with ASCII code 13 from
+some of the text files. If you have problems, just go and download the
+.tar.gz instead.
+
+You have two options. There is a Makefile which should cope with most
+systems. The first option is to use this default Makefile, and the procedure
+is explained below. The second option is to download
+dumb-0.9.3-autotools.tar.gz, extract it over the installation, run
+./configure and use the generated Makefile. Users who choose to do this are
+left to their own devices but advised to read the information at the end of
+this section. I strongly recommend the first option.
+
+If you are not using the configure script, the procedure is as follows.
+
+First, run the following command as a normal user:
+
+   make
+
+You will be asked whether you want Allegro support. Then, unless you are on
+BeOS, you will be asked where you'd like DUMB to install its headers,
+libraries and examples (which will go in the include/, lib/ and bin/
+subdirectories of the prefix you specify). BeOS has fixed locations for these
+files. You may use shell variables here, e.g. $HOME or ${HOME}, but ~ will
+not work. Once you have specified these pieces of information, the optimised
+and debugging builds of DUMB will be compiled, along with the examples. When
+it has finished, you can install them with:
+
+   make install
+
+You may need to be root for this to work. It depends on the prefix you chose.
+
+Note: the Makefile will only work if COMSPEC and ComSpec are both undefined.
+If either of these is defined, the Makefile will try to build for a Windows
+system, and will fail.
+
+Please let me know if you have any trouble.
+
+Scroll down for information on the example programs. Refer to docs/howto.txt
+when you are ready to start programming with DUMB. If you use DUMB in a game,
+let me know - I might decide to place a link to your game on DUMB's website!
+
+Important information for users of the configure script follows.
+
+The Makefile generated by the configure script creates dynamically linked
+libraries, and I don't know how to stop it from doing so. See the section
+below on building DUMB manually for why I recommend linking DUMB statically.
+However, if you choose to use the configure script, note the following.
+
+The default Makefile is a copy of Makefile.rdy (short for 'ready'), and it
+must exist with the name Makefile.rdy in order to work. The configure script
+will overwrite Makefile, so if you want the default Makefile back, just run:
+
+   cp Makefile.rdy Makefile
+
+Do not use a symlink, as that would result in Makefile.rdy getting
+overwritten next time the configure script is run!
+
+You can also access the usual build system by passing '-f Makefile.rdy' to
+Make.
+
+
+********************************************************
+*** How to build DUMB manually if nothing else works ***
+********************************************************
+
+
+Those porting to platforms without floating point support should be aware
+that DUMB does use floating point operations but not in the inner loops. They
+are used for volume and note pitch calculations, and they are used when
+initialising the filter algorithm for given cut-off and resonance values.
+Please let me know if this is a problem for you. If there is enough demand, I
+may be able to eliminate one or both of these cases.
+
+All of the library source code may be found in the src/ subdirectory. There
+are headers in the include/ subdirectory, and src/helpers/resample.c also
+#includes some .inc files in its own directory.
+
+There are four subdirectories under src/. For projects not using Allegro, you
+will need all the files in src/core/, src/helpers/ and src/it/. If you are
+using Allegro, you will want the src/allegro/ subdirectory too. For
+consistency with the other build systems, the contents of src/allegro/ should
+be compiled into a separate library.
+
+I recommend static-linking DUMB, since the version information is done via
+macros and the API has a tendency to change. If you static-link, then once
+your program is in binary form, you can be sure that changes to the installed
+version of DUMB won't cause it to malfuction. It is my fault that the API has
+been so unstable. Sorry!
+
+Compile each .c file separately. As mentioned above, you will need to specify
+two places to look for #include files: the include/ directory and the source
+file's own directory. You will also need to define the symbol
+DUMB_DECLARE_DEPRECATED on the command line.
+
+Do not compile the .inc files separately.
+
+You may need to edit dumb.h and add your own definition for LONG_LONG. It
+should be a 64-bit integer. If you do this, please see if you can add a check
+for your compiler so that it still works with other compilers.
+
+DUMB has two build modes. If you define the symbol DEBUGMODE, some checks for
+programmer error will be incorporated into the library. Otherwise it will be
+built without any such checks. (DUMB will however always thoroughly check the
+validity of files it is loading. If you ever find a module file that crashes
+DUMB, please let me know!)
+
+I recommend building two versions of the library, one with DEBUGMODE defined
+and debugging information included, and the other with compiler optimisation
+enabled. If you can install DUMB system-wide so that your projects, and other
+people's, can simply #include <dumb.h> or <aldumb.h> and link with libraries
+by simple name with no path, then that is ideal.
+
+If you successfully port DUMB to a new platform, please let me know!
+
+
+****************************
+*** The example programs ***
+****************************
+
+
+Three example programs are provided. On DOS and Windows, you can find them in
+the examples subdirectory. On other systems they will be installed system-
+wide.
+
+dumbplay
+   This program will only be built if you have Allegro. Pass it the filename
+   of an IT, XM, S3M or MOD file, and it will play it. It's not a polished
+   player with real-time threading or anything - so don't complain about it
+   stuttering while you use other programs - but it does show DUMB's fidelity
+   nicely. You can control the playback quality by editing dumb.ini, which
+   must be in the current working directory. (This is a flaw for systems
+   where the program is installed system-wide, but it is non-fatal.) Have a
+   look at the examples/dumb.ini file for further information.
+
+dumbout
+   This program does not need Allegro. You can use it to stream an IT, XM,
+   S3M or MOD file to raw PCM. This can be used as input to an encoder like
+   oggenc (with appropriate command-line options), or it can be sent to a
+   .pcm file which can be read by any respectable waveform editor. This
+   program is also convenient for timing DUMB. Compare the time it takes to
+   render a module with the module's playing time! dumbout doesn't try to
+   read any configuration file; the options are set on the command line.
+
+dumb2wav
+   This program is much the same as dumbout, but it writes a .wav file with
+   the appropriate header. Thanks go to Chad Austin for this useful tool.
+
+
+*********************************************
+*** Downloading music or writing your own ***
+*********************************************
+
+
+If you would like to compose your own music modules, then this section should
+help get you started.
+
+The best programs for the job are the trackers that pioneered the file
+formats:
+
+   Impulse Tracker - IT files - http://www.lim.com.au/ImpulseTracker/
+   Fast Tracker II - XM files - http://www.fasttracker2.com/
+   Scream Tracker 3 - S3M files - No official site known, please use Google
+
+MOD files come from the Amiga; I do not know what PC tracker to recommend for
+editing these. If you know of one, let me know! In the meantime, I would
+recommend using a more advanced file format. However, don't convert your
+existing MODs just for the sake of it.
+
+Fast Tracker II is Shareware. It offers a very flashy interface and has a
+game embedded, but the IT file format is more powerful and better defined. By
+all means try them both and see which you prefer; it is largely a matter of
+taste (and, in some cases, religion). Impulse Tracker and Scream Tracker 3
+are Freeware, although you can donate to Impulse Tracker and receive a
+slightly upgraded version. DUMB is likely to be at its best with IT files.
+
+These editors are DOS programs. Users of DOS-incapable operating systems may
+like to try ModPlug Tracker, but should read docs/modplug.txt before using it
+for any serious work. If you use a different operating system, or if you know
+of any module editors for Windows that are more faithful to the original
+trackers' playback, please give me some links so I can put them here!
+
+   ModPlug Tracker - http://www.modplug.com/
+
+If you have an x86 Linux system with VGA-compatible hardware (which covers
+all PC graphics cards I've ever seen), you should be able to get Impulse
+Tracker running with DOSEMU. You will have to give it access to the VGA ports
+and run it in a true console, as it will not work with the X-based VGA
+emulation. I personally added the SB16 emulation to DOSEMU, so you can even
+use filters! However, it corrupts samples alarmingly often when saving on my
+system - probably a DOSEMU issue. If you set this up, I am curious to know
+whether it works for you.
+
+   DOSEMU - http://www.dosemu.org/
+
+BEWARE OF WINAMP! Although it's excellent for MP3s, it is notorious for being
+one of the worst module players in existence; very many modules play wrongly
+with it. There are plug-ins available to improve Winamp's module support, for
+example WSP.
+
+   Winamp - http://www.winamp.com/
+   WSP - http://www.spytech.cz/index.php?sec=demo
+
+(There is a Winamp plug-in that uses DUMB, but it is unreliable. If anyone
+would like to work on it, please get in touch.)
+
+While I am at it I should also point out that Winamp is notorious for
+containing security flaws. Install it at your own risk, and if it is your
+work computer, check with your boss first!
+
+Samples and instruments are the building blocks of music modules. You can
+download samples at
+
+   http://www.tump.net/
+
+If you would like to download module files composed by other people, check
+the following sites:
+
+   http://www.modarchive.com/
+   http://www.scene.org/
+   http://www.tump.net/
+   http://www.homemusic.cc/main.php
+   http://www.modplug.com/
+
+Once again, if you know of more sites where samples or module files are
+available for download, please let me know.
+
+If you wish to use someone's music in your game, please respect the
+composer's wishes. In general, you should ask the composer. Music that has
+been placed in the Public Domain can be used by anyone for anything, but it
+wouldn't do any harm to ask anyway if you know who the author is. In many
+cases the author will be thrilled, so don't hesitate!
+
+A note about converting modules from one format to another, or converting
+from MIDI: don't do it, unless you are a musician and are prepared to go
+through the file and make sure everything sounds the way it should! The
+module formats are all slightly different, and MIDI is very different;
+converting from one format to another will usually do some damage.
+
+Instead, it is recommended that you allow DUMB to interpret the original file
+as it sees fit. DUMB may make mistakes (it does a lot of conversion on
+loading), but future versions of DUMB will be able to rectify these mistakes.
+On the other hand, if you convert the file, the damage is permanent.
+
+
+***********************
+*** Contact details ***
+***********************
+
+
+If you have trouble with DUMB, or want to contact me for any other reason, my
+e-mail address is given below. Please do get in touch, even if I appear to
+have disappeared!
+
+If you wish to chat online about something, perhaps on IRC, that can most
+likely be arranged. Send me an e-mail.
+
+
+******************
+*** Conclusion ***
+******************
+
+
+This is the conclusion.
+
+
+Ben Davis
+entheh@users.sf.net
diff --git a/libraries/dumb/release.txt b/libraries/dumb/release.txt
new file mode 100644
index 000000000..527d44933
--- /dev/null
+++ b/libraries/dumb/release.txt
@@ -0,0 +1,561 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * release.txt - Release notes for DUMB.              / / \  \
+ *                                                   | <  /   \_
+ *                                                   |  \/ /\   /
+ *                                                    \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+
+*******************************************
+*** DUMB v0.9.3, released 7 August 2005 ***
+*******************************************
+
+Hello! Welcome to a long-awaited-or-probably-just-given-up-on-by-everybody
+release! New to this release are lower memory usage, faster mixing loops,
+loading of text fields in the module files, and faster load functions for
+projects that don't need to seek within the module or know its length.
+Additionally, Chad Austin has contributed a dumb2wav tool for converting
+modules to .wav files and updated the Visual Studio 6 project files to
+compile all the examples as well as the library. Users of Unix-like systems
+will be pleased to know that on Chad's suggestion I have made the build
+system cope with variables such as $HOME or ${HOME} in the prefix.
+
+Chad has also contributed an Autotools build system, but neither of us
+recommends its use. The Autotools are an evil black box, we haven't quite
+managed to get it right, and goodness help you if it happens not to work for
+you. The files are available in a separate download if you absolutely need
+them. Notice that that download is almost twice as large as the rest of DUMB!
+
+Maybe we'll do SCons next time.
+
+Thanks to Chad for all his work. Chad is the author of Audiere, a portable
+sound library which has started using DUMB for its module playback! Wahoo!
+
+   http://audiere.sf.net/
+
+There are three main optimisations that went into the mixing loops.
+
+First, I downloaded ModPlugXMMS and had a peek at the mixing code, which is
+Public Domain. It uses look-up tables for the cubic mixing. I pinched the
+idea, and that sped DUMB's cubic (best quality) resamplers up by a factor of
+two or three.
+
+Secondly, the samples loaded from the file are now kept in 8-bit or 16-bit
+format, whereas previously they were being converted to 24-bit-in-32-bit on
+loading. This means the samples occupy a half or a quarter of the memory they
+used to occupy. It also had the side-effect of speeding up the mixing loops,
+but it meant I had to duplicate the resampling code. (It is all done with
+macros in the source code, but it becomes two copies on the binary level.)
+
+Secondly, stereo samples and stereo mixing buffers are now kept in
+interleaved format, where previously the two channels were done separately to
+keep the code simpler. This change has made the library quite a bit bigger,
+but has made the code run almost twice as fast for stereo output (tested for
+modules whose samples are mostly mono)!
+
+DUMB is now as fast as ModPlugXMMS on my system.
+
+Some people have also commented that DUMB seems to take a long time loading
+files. This is because immediately upon loading the file it runs the playback
+engine over it up as far as the point of first loop, taking snapshots at 30-
+second intervals to be used as references for fast seeking and finally
+storing the playback time. Of course, most games don't need this. You can now
+skip it by calling the _quick versions of the dumb_load_*(), dumb_read_*() or
+dumb_register_dat_*() functions. Should you need the data later, you can call
+dumb_it_do_initial_runthrough() to calculate it. Please note that this cannot
+currently be done safely from a concurrent thread while the music is playing.
+
+As mentioned, DUMB loads the text fields in module files now. You can
+retrieve the song title with duh_get_tag(). Sample names and file names and
+instrument names and filenames, and the song message for IT files, are
+available with a call to duh_get_it_sigdata() and various dumb_it_sd_*()
+functions. Please note that text fields added as extensions by ModPlug
+Tracker are not supported.
+
+DUMB's timing is ever so slightly more accurate. This is hardly noticeable,
+but it has meant that the length computed will increase very slightly.
+
+There are many small playback fixes in this release:
+
+* The Lxx effect in XM files (set envelope position) is now supported.
+
+* Pattern looping is now correct for XM files. Bizarrely, an ordinary pattern
+  loop whose start point isn't the first row seems to cause the next pattern
+  to start at the row corresponding to the loop start point. That must have
+  been a headache for people creating XM files! Nevertheless, DUMB now
+  emulates this behaviour. If you have an XM file that was written in a
+  tracker other than Fast Tracker II and breaks in DUMB, you can get around
+  it by putting a D00 effect (break to row 0) right at the end of the pattern
+  containing the loop.
+
+* XM pattern looping can be infinite. DUMB should detect this and call the
+  loop callback when it happens. Specifically, it has a loop counter for each
+  channel, so each time it sets or decrements that counter, it remembers the
+  loop end point for that channel. When the loop terminates, the loop end
+  point is reset to 0. If the loop end point ever decreases during a loop,
+  the loop callback is called. If anyone manages to get around this check and
+  prevent DUMB from calling the callback, please let me know and send me an
+  illustrative XM file!
+
+* For IT files, notes are now removed from channels if they have faded out,
+  even if they are still in the foreground. After this has happened, a row
+  with a note and Gxx (tone portamento) specified will cause a new note to
+  start playing, which is what Impulse Tracker does in this scenario.
+  (Normally, Gxx prevents the new note from playing and instead causes the
+  old note to start sliding towards the new note.)
+
+* If a tone portamento command occurred when no note was playing, the effect
+  value wasn't stored. This has been fixed. Thanks to Maim from #trax on
+  EFnet for discovering this bug.
+
+* DUMB now treats the parameter to the undocumented XM key off effect Kxx as
+  a delay, consistent with Fast Tracker II's behaviour. It has also been made
+  not to clear the note, so a subsequent volume command will restore it, as
+  in Fast Tracker II.
+
+* DUMB used to process the first row when you created the
+  DUMB_IT_SIGRENDERER. This happened before you had a chance to install any
+  callbacks. If an F00 effect occurred on the first row, the music would stop
+  immediately and the xm_speed_zero callback would be called if it were
+  present. Unfortunately, it wasn't present, and the algorithm for
+  calculating the length subsequently went into an endless loop while waiting
+  for it. Worse still, the same algorithm accumulated data for fast seeking,
+  and never stopped, so it pretty quickly consumed all the resources. DUMB
+  will now not process the first row until you first request some samples,
+  provided you pass zero for pos. Of course, any MOD or XM file with F00 in
+  the very first row won't do much anyway, but such files won't crash the
+  library now.
+
+* There was a subtle bug that affected a few XM files. For instruments with
+  no associated samples, the array mapping notes to samples is uninitialised.
+  This became a problem if such instruments were then used, which does happen
+  sometimes. On many systems, memory is initialised to zero when first given
+  to a program (for security reasons), so the problem didn't come up most of
+  the time. However, on platforms like DOS where memory isn't initialised, or
+  in programs that reuse memory later on (this includes the XMMS plug-in with
+  which I discovered the bug), a rogue note would occasionally play. This has
+  now been fixed.
+
+* DUMB's envelope handling for IT files was subtly wrong. Upon note off, it
+  stopped obeying the sustain loop points one tick too early. Notes were
+  often shorter than they should have been, and in pathological cases a whole
+  extra iteration of the sustain loop section might have been skipped. The
+  envelope code has now been rewritten. Thanks go to Allefant for Valgrinding
+  the new code!
+
+Finally, there were two build problems in the last version, which were fixed
+in the download marked with -fixed. They are of course correct in this
+version. For the record:
+
+* The make/config.bat file, responsible for generating make/config.txt, wrote
+  a crucial line to the wrong place, causing it to be left out of the file.
+  As a result, the makefile would fail to install everything for Allegro
+  users, and enter infinite recursion for other users. This applied to people
+  using DJGPP and MinGW.
+
+* DUMB's Makefile was supposed to install the example programs on Unix-based
+  platforms, but it wasn't doing. The fix was to edit Makefile and change the
+  one occurrence of $COMSPEC to $(COMSPEC).
+
+That's it! I hope you enjoy this long-awaited-or-probably-just-given-up-on-
+by-everybody release of DUMB!
+
+
+******************************************
+*** DUMB v0.9.2, released 2 April 2003 ***
+******************************************
+
+Yes, there really has been a release. This is not a day-late April fools'
+joke.
+
+DUMB's full name has changed! The old "Dedicated Universal Music
+Bastardisation" was rather silly, and not much more than a forced attempt at
+finding words beginning with D, U, M and B. I spent weeks and weeks browsing
+dictionaries and hopelessly asking others for bright ideas, until the
+brilliant Chris "Kitty Cat" Robinson came up with "Dynamic". I decided to
+keep the U as Universal, since a DUH struct can hold digital music in any
+format. Now all that remained was the B, but it didn't take me long to come
+up with Bibliotheque, which, despite looking French, is indeed considered an
+English word by Oxford English Dictionary Online, to which my university has
+a subscription. So there you have it - the name now makes sense.
+
+The two most significant additions to the project would have to be the new
+thread safety (with an important restriction, detailed in docs/dumb.txt), and
+the new build system. The silly 'makeall' and 'makecore' scripts are gone. If
+you are a GCC user, all you need do now is run 'make' and 'make install', as
+for other projects. You don't even have to run a 'fix' script any more! There
+are some caveats, which are covered in readme.txt. If you use Microsoft
+Visual C++ 6, you no longer need to obtain GCC and GNU Make - there is a
+project file just for you.
+
+Huge thanks go to Steve Terry for testing on Windows XP - about five times -
+and to lillo for testing on BeOS and Mac OS X. Thanks also to X-G for testing
+on a Windows system that has consistently posed problems for DUMB's old
+makefiles.
+
+There was a bug whereby al_poll_duh() would sometimes cause the music to
+resume playing if you called it after al_pause_duh(). Whether this was DUMB's
+fault for misusing Allegro's API, or a bug in Allegro, is unclear, but this
+release makes it work.
+
+In one of my projects, I found that my AL_DUH_PLAYER stopped playing when
+there were lots of other sound effects. In order to fix this, I programmed
+DUMB to set the priority of the stream's voice to 255, the maximum. I also
+added al_duh_set_priority(), so you can set the priority yourself if you need
+to.
+
+The resampling code has undergone a transformation. The bad news is that the
+linear average code is no longer in use. The good news is that where DUMB's
+resamplers used to require three extra samples' worth of memory to be
+allocated and initialised, it now copes with just the sample data. And it
+does a very good job at bouncing off loop points and otherwise hurtling
+around the sample. The resampling code is considerably more complicated, but
+the code that uses the resamplers is considerably simpler - and if you
+noticed a slight click in some bidirectionally looping samples, you'll be
+pleased to know that that click is gone!
+
+I have also devoted some effort to optimisation. It seemed hopeless for a
+while, but then I actually figured out a way of making it faster AND more
+accurate at the same time! DUMB is now quite a bit faster than it was, and it
+mixes not with 16-bit precision, but with 24-bit precision. (It used 32-bit
+integers all along, but the difference is that it now makes use of 256 times
+as much of the integer's range.)
+
+There have been the usual improvements to playback. The last release occurred
+rather too soon after I had fixed the XM effect memories; EAx and EBx, fine
+volume ramps, had been neglected. These are now handled properly.
+
+In previous versions of DUMB, muted channels in IT were actually played with
+surround sound panning (where the right-hand channel is inverted). This has
+been fixed, so muted channels will really be muted now.
+
+There were also some subtle problems with the way DUMB handled New Note
+Actions for IT files. It turned out that, in all releases of DUMB so far,
+pitch, filter and panning envelopes and sample vibrato were not being
+processed for any note that was forced into the background by a new note on
+the same channel! This only affected IT files. Not only has this been fixed,
+but envelope interpolation is much more accurate. Long trailing envelope-
+driven fade-outs sound a lot better now!
+
+Since panning and filter envelopes are more precise, extra fields have been
+added to the DUMB_IT_CHANNEL_STATE struct, used by
+dumb_it_sr_get_channel_state(). These fields hold the 'decimal' parts of the
+pan and filter cut-off. See dumb.txt for details.
+
+Mxx (set channel volume) now correctly only modifies the last note played on
+the channel, not any previous notes that have been forced into the background
+by New Note Actions, and filter effect processing is now closer to what
+Impulse Tracker does.
+
+The XM loader was slightly flawed and could crash on files containing samples
+with out-of-range loop points. One such file was given to me. This has been
+fixed.
+
+Finally, the legal stuff. Julien Cugniere has been added to the list of
+copyright owners. He deserves it, for all the work he did on the XM support!
+And the licence has been changed. You are no longer required to include a
+link to DUMB in a project that uses DUMB; the reasons for this relaxation are
+explained in licence.txt. However, the request is still there ...
+
+As usual, enjoy!
+
+
+**********************************************
+*** DUMB v0.9.1, released 19 December 2002 ***
+**********************************************
+
+Hi again! Lots to say this time, so I shall cut right to the chase.
+
+DUMB now supports Impulse Tracker's low-pass resonant filters! Huge thanks go
+to Jeffrey Lim, author of Impulse Tracker, for giving me what information he
+still had regarding the algorithm; to cut a long story short, modifying
+ModPlug Tracker's source code (which is in the Public Domain) led to an
+algorithm whose output matched Impulse Tracker's perfectly.
+
+Please note that ModPlug Tracker's filters as they stand do not match Impulse
+Tracker's, and I have no interest in supporting ModPlug Tracker's variant
+(especially not the integer rounding problems). Please see docs/modplug.txt,
+new in this release, for details.
+
+Thanks also go to Fatso Huuskonen for motivating me to add filter support,
+and providing me with several great IT files to test it with!
+
+The other important feature added for this release is click removal. Up until
+now, DUMB has generated clicks when cutting notes, starting samples in the
+middle, and so on. This version of DUMB will remove any such clicks. Note
+that DUMB does not use volume ramps to accomplish this; the algorithm will
+not take the bite out of the music!
+
+In other news, DUMB now supports sample vibrato for IT files, and instrument
+vibrato for XM files. A slight bug in New Note Action handling for IT files
+has been fixed; Note Fade will not break the sustain loops of the sample and
+envelope, as it did before. Tremor handling (Ixy) had a strange bug in it,
+which has been fixed.
+
+Support for XM files has been greatly enhanced. The XM envelope handling new
+in the last release contained a huge bug, resulting in notes seeming not to
+stop when they should; this has been fixed. Some XM files crashed DUMB, while
+others failed to load; these problems have been solved. Effect memories now
+work properly for XM and MOD files, to the best of my knowledge. Some other
+differences between IT and XM have been accounted for, most notably the
+Retrigger Note effects, Rxy and E9x.
+
+DUMB's sound quality and accuracy are not the only areas that have been
+enhanced. The API has been expanded, at last. You can now detect when a
+module loops, or make it play through just once. You can ask DUMB to inform
+you every time it generates some samples; this is useful for visualisation.
+For IT files, you can intercept the MIDI messages generated by Zxx macros,
+enabling you to synchronise your game with the music to some extent. (There
+is no such method for XM, S3M or MOD files yet; sorry. Also note that the
+function will be called before you actually hear the sound; I cannot improve
+this until DUMB has its own sound drivers, which won't be for a while.) You
+can query the current order and row. Finally, operations like changing the
+speed and tempo are now possible, and you can query the playback state on
+each channel.
+
+Some parts of DUMB's API have been deprecated. Simple programs that use
+Allegro will be unaffected, but if you get some compiler warnings or errors,
+please review docs/deprec.txt. This file explains why those parts of the API
+were deprecated, and tells you how to adapt your code; the changes you need
+to make are straightforward. Sorry for the inconvenience.
+
+For various reasons, I have made DUMB's makefiles use different compiler
+flags depending on your GCC version (unless you are using MSVC). There is no
+elegant way of getting the makefiles to detect when GCC is upgraded. If you
+upgrade GCC, you should execute 'make clean' in order to make DUMB detect the
+GCC version again. Otherwise you may get some annoying error messages. (It is
+wise to do this in any case, so that all the object files are built with the
+same GCC version.)
+
+DUMB's example players have been unified into a single player called
+'dumbplay'. The player has been enhanced to display messages when the music
+loops, and when XM and MOD files freeze (effect F00; more information on this
+in docs/howto.txt).
+
+Finally, as noted on DUMB's website, the release notes from the last release
+were inaccurate. It has been verified that DUMBOGG v0.5 does still work with
+that release, and still works with this release. The esoteric DUMBOGG v0.6
+has not been created yet, since DUMBOGG v0.5 still works.
+
+Please scroll down and read through the indented paragraphs in the notes for
+the last release; they are relevant for this release too.
+
+That's all folks! Until next time.
+
+
+*******************************************
+*** DUMB v0.9, released 16 October 2002 ***
+*******************************************
+
+MOD support is here! DUMB now supports all four of the common module formats.
+As usual, there have also been some improvements to the way modules are
+played back. Most notably, handling of tone portamento in IT files has been
+improved a lot, and XM envelopes are now processed correctly.
+
+The other major change is that DUMB now does a dummy run through each module
+on loading. It stores the playback state at thirty-second intervals. It stops
+when the module first loops, and then stores the playback time. This results
+in a slightly longer load time and a greater memory overhead, but seeking is
+faster (to any point before the module first loops) and the length is
+calculated! duh_get_length() will return this and is now documented in
+docs/howto.txt and docs/dumb.txt.
+
+DUMB's build process has been changed to use 'mingw' wherever it used
+'mingw32' before; some directories have been renamed, and the 'fix' command
+you had to run for MinGW has been changed from 'fix mingw32' to 'fix mingw'.
+
+Last time, I directed you to scroll down and read the notes from a past
+release, but ignore this point, and that point applies to something else, and
+so on. Did anyone do so? Well, if you're reading this at all, you probably
+did. Nevertheless, this time I shall be much less confusing and restate any
+relevant information. So the least you can do is read it!
+
+- If your program ever aborts with exit code 37 while loading an IT file,
+  PLEASE LET ME KNOW! The IT file in question has a stereo compressed sample
+  in it, and the format is unspecified for this case (Impulse Tracker itself
+  doesn't use stereo samples at all). I will need the IT file in question,
+  and any information you can give me about how the IT file was created (e.g.
+  what program). (If you don't get to see an exit code, let me know anyway.)
+
+- If your program ever outputs a line resembling "Inst 01 Env: 0,64 8,32
+  15,48" to stderr while loading an IT file, PLEASE LET ME KNOW! You have an
+  old IT file (saved by an Impulse Tracker version older than 2.00), and
+  support for such files is STILL untested.
+
+- Not all parts of DUMB's API are documented yet. You will find some
+  functions in dumb.h which are not listed in docs/dumb.txt; the reason is
+  that these functions still need work and will probably change. If you
+  really, really want to use them, talk to me first (IRC EFnet #dumb is a
+  good place for this; see readme.txt for details on using IRC). I intend to
+  finalise and document the whole of DUMB's API for Version 1.0.
+
+There have been some changes to the naming conventions in DUMB's undocumented
+API. DUMBOGG v0.5 will not work with this and subsequent releases of DUMB;
+please upgrade to DUMBOGG v0.6. These changes should not break anything in
+your own code, since you didn't use those parts of the API, did you ;)
+
+There is still a great deal of work to be done before DUMB's API can be
+finalised, and thus it will be a while before DUMB v1.0 comes out. It should
+be worth the wait. In the meantime, there will be 0.9.x releases with
+additional functionality, improved playback, and possibly support for some
+extra file formats.
+
+Finally I should like to offer an apology; there is a strong possibility that
+some of DUMB's official API will change in the near future. There will not be
+any drastic changes, and the corresponding changes to your source code will
+be simple enough. If I didn't make these changes, DUMB's API would start to
+become limited, or messy, or both, so it's for the better. I apologise in
+advance for this.
+
+Now scroll down and read the notes for the first r... oh wait, we already did
+that. I guess that's it then. You can stop reading now.
+
+Right after you've read this.
+
+And this.
+
+Off you go.
+
+Bye.
+
+
+********************************************
+*** DUMB v0.8.1, released 11 August 2002 ***
+********************************************
+
+This is a minor release that fixes a few bugs. One of these bugs, however,
+was pretty serious. dumb_register_dat_xm() was never coded! It was prototyped
+in aldumb.h, so code would compile, but there would be an unresolved symbol
+at the linking stage. This has been fixed.
+
+Platforms other than Unix did not have a working 'make veryclean' target;
+this has been fixed. In addition, the makefiles now use 'xcopy' instead of
+'copy', since on some systems GNU Make seems to have trouble calling commands
+built in to the shell.
+
+Contrary to the errata that was on the DUMB website, the makeall.sh and
+makecore.sh scripts actually DID install in /usr. This has now been
+corrected, and regardless of whether you use these scripts or call make
+directly, the files will now be installed to /usr/local by default.
+
+The XM loader used to treat stereo samples as mono samples with the data for
+the right channel positioned after the data for the left channel. This
+generally resulted in an unwanted echo effect. This has been fixed.
+
+When playing XM files, specifying an invalid instrument would cause an old
+note on that channel to come back (roughly speaking). Fast Tracker 2 does not
+exhibit this behaviour. This has been fixed.
+
+The GCC makefiles used -mpentium, which is deprecated in gcc 3.x. This was
+generating warnings, and has now been fixed.
+
+In XM files, the length of a sample is stored in bytes. DUMB was assuming
+that the length of a 16-bit sample would be even. I had two XM files where
+this was not the case, and DUMB was unable to load them. This has been fixed.
+
+In order to accommodate the extra part of the version number,
+DUMB_REVISION_VERSION has been added. DUMB_VERSION has also been added in
+order to facilitate checking if the version of DUMB installed is sufficient.
+See docs/dumb.txt for details.
+
+As a last-minute fix, the XM "Break to row" effect is now loaded properly. It
+was necessary to convert from binary-coded decimal to hexadecimal (those who
+have experience with Fast Tracker 2 will know what I mean). In short, this
+means the effect will now work properly when breaking to row 10 or greater.
+
+DUMB v0.8 had faulty release date constants; DUMB_MONTH and DUMB_DAY were
+swapped! For this reason, DUMB_DATE should not be compared against any date
+in 2002. This note has been added to docs/dumb.txt and also to dumb.h.
+
+Please scroll to the end and read the release notes for the first version,
+DUMB v0.7. Most of them apply equally to this release. However, the
+non-portable code was rewritten for DUMB v0.8, so that point does not apply.
+The point about length not being calculated also applies to XM files.
+
+Enjoy :)
+
+
+****************************************
+*** DUMB v0.8, released 14 June 2002 ***
+****************************************
+
+Welcome to the second release of DUMB!
+
+In addition to these notes, please read below the release notes for the
+previous version, DUMB v0.7. Most of them apply equally to this release.
+However, the non-portable code has been rewritten; DUMB should now port to
+big-endian platforms.
+
+The main improvement in this release of DUMB is the support for XM files.
+Enormous thanks go to Julien Cugniere for working on this while I had to
+revise for my exams!
+
+There was a mistake in the makefiles in the last release. The debugging
+Allegro interface library was mistakenly named libaldmbd.a instead of
+libaldmd.a, meaning you had to compile with -laldmbd, contrary to what the
+docs said. Apologies to everyone who lost sleep trying to work out what was
+wrong! The reason for using libaldmd.a is to maintain compatibility with
+plain DOS, where filenames are limited to eight characters (plus a three-
+letter extension). The makefiles have now been changed to match the
+information in the docs, so you may have to alter your project files
+accordingly.
+
+The example programs were faulty, and crashed on Windows if they were unable
+to load the file. It was also difficult to work out how to exit them (you had
+to click the taskbar button that didn't have a window, then press a key).
+They have been improved in both these respects.
+
+I have now added a docs/faq.txt file (Frequently Asked Questions), which is
+based on problems and misconceptions people have had with the first release.
+Please refer to it before contacting me with problems.
+
+Thanks to networm for touching up the Unix makefile and writing the
+instructions on using it.
+
+Incidentally, today (Friday 14 June) is the Robinson College May Ball at
+Cambridge Uni. God knows why it's called a May Ball if it's in June. I'm not
+going myself (72 GBP, and I'd have to wear a suit, ugh), but with all the
+noise outside I shall enjoy pumping up the speakers tonight!
+
+
+****************************************
+*** DUMB v0.7, released 2 March 2002 ***
+****************************************
+
+This is the first release of DUMB, and parts of the library are not
+crystallised. Don't let this put you off! Provided you don't try to use any
+features that aren't documented in docs/dumb.txt, the library should be rock
+solid and you should be able to upgrade more or less without problems.
+
+Here are some notes on this release:
+
+- There is some non-portable code in this release of DUMB. It is likely that
+  the library will fail to load IT files with compressed samples on
+  big-endian machines such as the Apple Macintosh.
+
+- If your program ever aborts with exit code 37 while loading an IT file,
+  PLEASE LET ME KNOW! The IT file in question has a stereo compressed sample
+  in it, and the format is unspecified for this case (Impulse Tracker itself
+  doesn't use stereo samples at all). I will need the IT file in question,
+  and any information you can give me about how the IT file was created (e.g.
+  what program). (If you don't get to see an exit code, let me know anyway.)
+
+- If your program ever outputs a line resembling "Inst 01 Env: 0,64 8,32
+  15,48" to stderr while loading an IT file, PLEASE LET ME KNOW! You have an
+  old IT file (saved by an Impulse Tracker version older than 2.00), and
+  support for such files is untested.
+
+- The length of IT and S3M files is not currently calculated. It is just set
+  to ten minutes.
diff --git a/libraries/dumb/src/core/atexit.c b/libraries/dumb/src/core/atexit.c
new file mode 100644
index 000000000..16c6abdb2
--- /dev/null
+++ b/libraries/dumb/src/core/atexit.c
@@ -0,0 +1,71 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * atexit.c - Library Clean-up Management.            / / \  \
+ *                                                   | <  /   \_
+ * By entheh.                                        |  \/ /\   /
+ *                                                    \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <stdlib.h>
+
+#include "dumb.h"
+#include "internal/dumb.h"
+
+
+
+typedef struct DUMB_ATEXIT_PROC
+{
+	struct DUMB_ATEXIT_PROC *next;
+	void (*proc)(void);
+}
+DUMB_ATEXIT_PROC;
+
+
+
+static DUMB_ATEXIT_PROC *dumb_atexit_proc = NULL;
+
+
+
+int dumb_atexit(void (*proc)(void))
+{
+	DUMB_ATEXIT_PROC *dap = dumb_atexit_proc;
+
+	while (dap) {
+		if (dap->proc == proc) return 0;
+		dap = dap->next;
+	}
+
+	dap = malloc(sizeof(*dap));
+
+	if (!dap)
+		return -1;
+
+	dap->next = dumb_atexit_proc;
+	dap->proc = proc;
+	dumb_atexit_proc = dap;
+
+	return 0;
+}
+
+
+
+void dumb_exit(void)
+{
+	while (dumb_atexit_proc) {
+		DUMB_ATEXIT_PROC *next = dumb_atexit_proc->next;
+		(*dumb_atexit_proc->proc)();
+		free(dumb_atexit_proc);
+		dumb_atexit_proc = next;
+	}
+}
diff --git a/libraries/dumb/src/core/duhlen.c b/libraries/dumb/src/core/duhlen.c
new file mode 100644
index 000000000..4570f1508
--- /dev/null
+++ b/libraries/dumb/src/core/duhlen.c
@@ -0,0 +1,42 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * duhlen.c - Functions to set and return the         / / \  \
+ *            length of a DUH.                       | <  /   \_
+ *                                                   |  \/ /\   /
+ * By entheh.                                         \_  /  > /
+ *                                                      | \ / /
+ * Note that the length of a DUH is a constant          |  ' /
+ * stored in the DUH struct and in the DUH disk          \__/
+ * format. It will be calculated on loading for
+ * other formats in which the length is not explicitly stored. Also note that
+ * it does not necessarily correspond to the length of time for which the DUH
+ * will generate samples. Rather it represents a suitable point for a player
+ * such as Winamp to stop, and in any good DUH it will allow for any final
+ * flourish to fade out and be appreciated.
+ */
+
+#include "dumb.h"
+#include "internal/dumb.h"
+
+
+
+int32 DUMBEXPORT duh_get_length(DUH *duh)
+{
+	return duh ? duh->length : 0;
+}
+
+
+
+void DUMBEXPORT duh_set_length(DUH *duh, int32 length)
+{
+	if (duh)
+		duh->length = length;
+}
diff --git a/libraries/dumb/src/core/duhtag.c b/libraries/dumb/src/core/duhtag.c
new file mode 100644
index 000000000..95664d58b
--- /dev/null
+++ b/libraries/dumb/src/core/duhtag.c
@@ -0,0 +1,38 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * duhtag.c - Function to return the tags stored      / / \  \
+ *            in a DUH struct (typically author      | <  /   \_
+ *            information).                          |  \/ /\   /
+ *                                                    \_  /  > /
+ * By entheh.                                           | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <string.h>
+
+#include "dumb.h"
+#include "internal/dumb.h"
+
+
+
+const char *DUMBEXPORT duh_get_tag(DUH *duh, const char *key)
+{
+	int i;
+	ASSERT(key);
+	if (!duh || !duh->tag) return NULL;
+
+	for (i = 0; i < duh->n_tags; i++)
+		if (strcmp(key, duh->tag[i][0]) == 0)
+			return duh->tag[i][1];
+
+	return NULL;
+}
diff --git a/libraries/dumb/src/core/dumbfile.c b/libraries/dumb/src/core/dumbfile.c
new file mode 100644
index 000000000..f0876b752
--- /dev/null
+++ b/libraries/dumb/src/core/dumbfile.c
@@ -0,0 +1,418 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * dumbfile.c - Hookable, strictly sequential         / / \  \
+ *              file input functions.                | <  /   \_
+ *                                                   |  \/ /\   /
+ * By entheh.                                         \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <stdlib.h>
+
+#include "dumb.h"
+
+
+
+static const DUMBFILE_SYSTEM *the_dfs = NULL;
+
+
+
+void DUMBEXPORT register_dumbfile_system(const DUMBFILE_SYSTEM *dfs)
+{
+	ASSERT(dfs);
+	ASSERT(dfs->open);
+	ASSERT(dfs->getc);
+	ASSERT(dfs->close);
+    ASSERT(dfs->seek);
+    ASSERT(dfs->get_size);
+	the_dfs = dfs;
+}
+
+
+
+#include "internal/dumbfile.h"
+
+
+
+DUMBFILE *DUMBEXPORT dumbfile_open(const char *filename)
+{
+	DUMBFILE *f;
+
+	ASSERT(the_dfs);
+
+	f = (DUMBFILE *) malloc(sizeof(*f));
+
+	if (!f)
+		return NULL;
+
+	f->dfs = the_dfs;
+
+	f->file = (*the_dfs->open)(filename);
+
+	if (!f->file) {
+		free(f);
+		return NULL;
+	}
+
+	f->pos = 0;
+
+	return f;
+}
+
+
+
+DUMBFILE *DUMBEXPORT dumbfile_open_ex(void *file, const DUMBFILE_SYSTEM *dfs)
+{
+	DUMBFILE *f;
+
+	ASSERT(dfs);
+	ASSERT(dfs->getc);
+	ASSERT(file);
+
+	f = (DUMBFILE *) malloc(sizeof(*f));
+
+	if (!f) {
+		if (dfs->close)
+			(*dfs->close)(file);
+		return NULL;
+	}
+
+	f->dfs = dfs;
+	f->file = file;
+
+	f->pos = 0;
+
+	return f;
+}
+
+
+
+int32 DUMBEXPORT dumbfile_pos(DUMBFILE *f)
+{
+	ASSERT(f);
+
+	return f->pos;
+}
+
+
+
+int DUMBEXPORT dumbfile_skip(DUMBFILE *f, long n)
+{
+	int rv;
+
+	ASSERT(f);
+	ASSERT(n >= 0);
+
+	if (f->pos < 0)
+		return -1;
+
+	f->pos += n;
+
+	if (f->dfs->skip) {
+		rv = (*f->dfs->skip)(f->file, n);
+		if (rv) {
+			f->pos = -1;
+			return rv;
+		}
+	} else {
+		while (n) {
+			rv = (*f->dfs->getc)(f->file);
+			if (rv < 0) {
+				f->pos = -1;
+				return rv;
+			}
+			n--;
+		}
+	}
+
+	return 0;
+}
+
+
+
+int DUMBEXPORT dumbfile_getc(DUMBFILE *f)
+{
+	int rv;
+
+	ASSERT(f);
+
+	if (f->pos < 0)
+		return -1;
+
+	rv = (*f->dfs->getc)(f->file);
+
+	if (rv < 0) {
+		f->pos = -1;
+		return rv;
+	}
+
+	f->pos++;
+
+	return rv;
+}
+
+
+
+int DUMBEXPORT dumbfile_igetw(DUMBFILE *f)
+{
+	int l, h;
+
+	ASSERT(f);
+
+	if (f->pos < 0)
+		return -1;
+
+	l = (*f->dfs->getc)(f->file);
+	if (l < 0) {
+		f->pos = -1;
+		return l;
+	}
+
+	h = (*f->dfs->getc)(f->file);
+	if (h < 0) {
+		f->pos = -1;
+		return h;
+	}
+
+	f->pos += 2;
+
+	return l | (h << 8);
+}
+
+
+
+int DUMBEXPORT dumbfile_mgetw(DUMBFILE *f)
+{
+	int l, h;
+
+	ASSERT(f);
+
+	if (f->pos < 0)
+		return -1;
+
+	h = (*f->dfs->getc)(f->file);
+	if (h < 0) {
+		f->pos = -1;
+		return h;
+	}
+
+	l = (*f->dfs->getc)(f->file);
+	if (l < 0) {
+		f->pos = -1;
+		return l;
+	}
+
+	f->pos += 2;
+
+	return l | (h << 8);
+}
+
+
+
+int32 DUMBEXPORT dumbfile_igetl(DUMBFILE *f)
+{
+	uint32 rv, b;
+
+	ASSERT(f);
+
+	if (f->pos < 0)
+		return -1;
+
+	rv = (*f->dfs->getc)(f->file);
+	if ((sint32)rv < 0) {
+		f->pos = -1;
+		return rv;
+	}
+
+	b = (*f->dfs->getc)(f->file);
+	if ((sint32)b < 0) {
+		f->pos = -1;
+		return b;
+	}
+	rv |= b << 8;
+
+	b = (*f->dfs->getc)(f->file);
+	if ((sint32)b < 0) {
+		f->pos = -1;
+		return b;
+	}
+	rv |= b << 16;
+
+	b = (*f->dfs->getc)(f->file);
+	if ((sint32)b < 0) {
+		f->pos = -1;
+		return b;
+	}
+	rv |= b << 24;
+
+	f->pos += 4;
+
+	return rv;
+}
+
+
+
+int32 DUMBEXPORT dumbfile_mgetl(DUMBFILE *f)
+{
+	uint32 rv, b;
+
+	ASSERT(f);
+
+	if (f->pos < 0)
+		return -1;
+
+	rv = (*f->dfs->getc)(f->file);
+	if ((sint32)rv < 0) {
+		f->pos = -1;
+		return rv;
+	}
+	rv <<= 24;
+
+	b = (*f->dfs->getc)(f->file);
+	if ((sint32)b < 0) {
+		f->pos = -1;
+		return b;
+	}
+	rv |= b << 16;
+
+	b = (*f->dfs->getc)(f->file);
+	if ((sint32)b < 0) {
+		f->pos = -1;
+		return b;
+	}
+	rv |= b << 8;
+
+	b = (*f->dfs->getc)(f->file);
+	if ((sint32)b < 0) {
+		f->pos = -1;
+		return b;
+	}
+	rv |= b;
+
+	f->pos += 4;
+
+	return rv;
+}
+
+
+
+uint32 DUMBEXPORT dumbfile_cgetul(DUMBFILE *f)
+{
+	uint32 rv = 0;
+	int v;
+
+	do {
+		v = dumbfile_getc(f);
+
+		if (v < 0)
+			return v;
+
+		rv <<= 7;
+		rv |= v & 0x7F;
+	} while (v & 0x80);
+
+	return rv;
+}
+
+
+
+sint32 DUMBEXPORT dumbfile_cgetsl(DUMBFILE *f)
+{
+	uint32 rv = dumbfile_cgetul(f);
+
+	if (f->pos < 0)
+		return rv;
+
+	return (rv >> 1) | (rv << 31);
+}
+
+
+
+int32 DUMBEXPORT dumbfile_getnc(char *ptr, int32 n, DUMBFILE *f)
+{
+	int32 rv;
+
+	ASSERT(f);
+	ASSERT(n >= 0);
+
+	if (f->pos < 0)
+		return -1;
+
+	if (f->dfs->getnc) {
+		rv = (*f->dfs->getnc)(ptr, n, f->file);
+		if (rv < n) {
+			f->pos = -1;
+			return MAX(rv, 0);
+		}
+	} else {
+		for (rv = 0; rv < n; rv++) {
+			int c = (*f->dfs->getc)(f->file);
+			if (c < 0) {
+				f->pos = -1;
+				return rv;
+			}
+			*ptr++ = c;
+		}
+	}
+
+	f->pos += rv;
+
+	return rv;
+}
+
+
+
+int DUMBEXPORT dumbfile_seek(DUMBFILE *f, long n, int origin)
+{
+    switch ( origin )
+    {
+    case DFS_SEEK_CUR: n += f->pos; break;
+    case DFS_SEEK_END: n += (*f->dfs->get_size)(f->file); break;
+    }
+    f->pos = n;
+    return (*f->dfs->seek)(f->file, n);
+}
+
+
+
+int32 DUMBEXPORT dumbfile_get_size(DUMBFILE *f)
+{
+    return (*f->dfs->get_size)(f->file);
+}
+
+
+
+int DUMBEXPORT dumbfile_error(DUMBFILE *f)
+{
+	ASSERT(f);
+
+	return f->pos < 0;
+}
+
+
+
+int DUMBEXPORT dumbfile_close(DUMBFILE *f)
+{
+	int rv;
+
+	ASSERT(f);
+
+	rv = f->pos < 0;
+
+	if (f->dfs->close)
+		(*f->dfs->close)(f->file);
+
+	free(f);
+
+	return rv;
+}
diff --git a/libraries/dumb/src/core/loadduh.c b/libraries/dumb/src/core/loadduh.c
new file mode 100644
index 000000000..2891298f9
--- /dev/null
+++ b/libraries/dumb/src/core/loadduh.c
@@ -0,0 +1,42 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loadduh.c - Code to read a DUH from a file,        / / \  \
+ *             opening and closing the file for      | <  /   \_
+ *             you.                                  |  \/ /\   /
+ *                                                    \_  /  > /
+ * By entheh.                                           | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+#include "internal/dumb.h"
+
+
+
+/* load_duh(): loads a .duh file, returning a pointer to a DUH struct.
+ * When you have finished with it, you must pass the pointer to unload_duh()
+ * so that the memory can be freed.
+ */
+DUH *DUMBEXPORT load_duh(const char *filename)
+{
+	DUH *duh;
+	DUMBFILE *f = dumbfile_open(filename);
+
+	if (!f)
+		return NULL;
+
+	duh = read_duh(f);
+
+	dumbfile_close(f);
+
+	return duh;
+}
diff --git a/libraries/dumb/src/core/makeduh.c b/libraries/dumb/src/core/makeduh.c
new file mode 100644
index 000000000..1c2695cfb
--- /dev/null
+++ b/libraries/dumb/src/core/makeduh.c
@@ -0,0 +1,151 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * makeduh.c - Function to construct a DUH from       / / \  \
+ *             its components.                       | <  /   \_
+ *                                                   |  \/ /\   /
+ * By entheh.                                         \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "dumb.h"
+#include "internal/dumb.h"
+
+
+
+static DUH_SIGNAL *make_signal(DUH_SIGTYPE_DESC *desc, sigdata_t *sigdata)
+{
+	DUH_SIGNAL *signal;
+
+	ASSERT((desc->start_sigrenderer && desc->end_sigrenderer) || (!desc->start_sigrenderer && !desc->end_sigrenderer));
+	ASSERT(desc->sigrenderer_generate_samples && desc->sigrenderer_get_current_sample);
+
+	signal = malloc(sizeof(*signal));
+
+	if (!signal) {
+		if (desc->unload_sigdata)
+			if (sigdata)
+				(*desc->unload_sigdata)(sigdata);
+		return NULL;
+	}
+
+	signal->desc = desc;
+	signal->sigdata = sigdata;
+
+	return signal;
+}
+
+
+
+DUH *make_duh(
+	int32 length,
+	int n_tags,
+	const char *const tags[][2],
+	int n_signals,
+	DUH_SIGTYPE_DESC *desc[],
+	sigdata_t *sigdata[]
+)
+{
+	DUH *duh = malloc(sizeof(*duh));
+	int i;
+	int fail;
+
+	if (duh) {
+		duh->n_signals = n_signals;
+
+		duh->signal = malloc(n_signals * sizeof(*duh->signal));
+
+		if (!duh->signal) {
+			free(duh);
+			duh = NULL;
+		}
+	}
+
+	if (!duh) {
+		for (i = 0; i < n_signals; i++)
+			if (desc[i]->unload_sigdata)
+				if (sigdata[i])
+					(*desc[i]->unload_sigdata)(sigdata[i]);
+		return NULL;
+	}
+
+	duh->n_tags = 0;
+	duh->tag = NULL;
+
+	fail = 0;
+
+	for (i = 0; i < n_signals; i++) {
+		duh->signal[i] = make_signal(desc[i], sigdata[i]);
+		if (!duh->signal[i])
+			fail = 1;
+	}
+
+	if (fail) {
+		unload_duh(duh);
+		return NULL;
+	}
+
+	duh->length = length;
+
+	{
+		int mem = n_tags * 2; /* account for NUL terminators here */
+		char *ptr;
+
+		for (i = 0; i < n_tags; i++)
+			mem += (int)(strlen(tags[i][0]) + strlen(tags[i][1]));
+
+		if (mem <= 0) return duh;
+
+		duh->tag = malloc(n_tags * sizeof(*duh->tag));
+		if (!duh->tag) return duh;
+		duh->tag[0][0] = malloc(mem);
+		if (!duh->tag[0][0]) {
+			free(duh->tag);
+			duh->tag = NULL;
+			return duh;
+		}
+		duh->n_tags = n_tags;
+		ptr = duh->tag[0][0];
+		for (i = 0; i < n_tags; i++) {
+			duh->tag[i][0] = ptr;
+			strcpy(ptr, tags[i][0]);
+			ptr += strlen(tags[i][0]) + 1;
+			duh->tag[i][1] = ptr;
+			strcpy(ptr, tags[i][1]);
+			ptr += strlen(tags[i][1]) + 1;
+		}
+	}
+
+	return duh;
+}
+
+int DUMBEXPORT duh_add_signal(DUH *duh, DUH_SIGTYPE_DESC *desc, sigdata_t *sigdata)
+{
+	DUH_SIGNAL **signal;
+
+	if ( !duh || !desc || !sigdata ) return -1;
+
+	signal = ( DUH_SIGNAL ** ) realloc( duh->signal, ( duh->n_signals + 1 ) * sizeof( *duh->signal ) );
+	if ( !signal ) return -1;
+	duh->signal = signal;
+
+	memmove( signal + 1, signal, duh->n_signals * sizeof( *signal ) );
+	duh->n_signals++;
+
+	signal[ 0 ] = make_signal( desc, sigdata );
+	if ( !signal[ 0 ] ) return -1;
+
+	return 0;
+}
diff --git a/libraries/dumb/src/core/rawsig.c b/libraries/dumb/src/core/rawsig.c
new file mode 100644
index 000000000..1651d06bc
--- /dev/null
+++ b/libraries/dumb/src/core/rawsig.c
@@ -0,0 +1,58 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * rawsig.c - Function to retrieve raw signal         / / \  \
+ *            data from a DUH provided you know      | <  /   \_
+ *            what type of signal it is.             |  \/ /\   /
+ *                                                    \_  /  > /
+ * By entheh.                                           | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <stdlib.h>
+
+#include "dumb.h"
+#include "internal/dumb.h"
+
+
+
+/* You have to specify the type of sigdata, proving you know what to do with
+ * the pointer. If you get it wrong, you can expect NULL back.
+ */
+sigdata_t *DUMBEXPORT duh_get_raw_sigdata(DUH *duh, int sig, int32 type)
+{
+	int i;
+	DUH_SIGNAL *signal;
+
+	if (!duh) return NULL;
+
+	if ( sig >= 0 )
+	{
+		if ((unsigned int)sig >= (unsigned int)duh->n_signals) return NULL;
+
+		signal = duh->signal[sig];
+
+		if (signal && signal->desc->type == type)
+			return signal->sigdata;
+	}
+	else
+	{
+		for ( i = 0; i < duh->n_signals; i++ )
+		{
+			signal = duh->signal[i];
+
+			if (signal && signal->desc->type == type)
+				return signal->sigdata;
+		}
+	}
+
+	return NULL;
+}
diff --git a/libraries/dumb/src/core/readduh.c b/libraries/dumb/src/core/readduh.c
new file mode 100644
index 000000000..4c40c98e9
--- /dev/null
+++ b/libraries/dumb/src/core/readduh.c
@@ -0,0 +1,107 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * readduh.c - Code to read a DUH from an open        / / \  \
+ *             file.                                 | <  /   \_
+ *                                                   |  \/ /\   /
+ * By entheh.                                         \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <stdlib.h>
+
+#include "dumb.h"
+#include "internal/dumb.h"
+
+
+
+static DUH_SIGNAL *read_signal(DUH *duh, DUMBFILE *f)
+{
+	DUH_SIGNAL *signal;
+	int32 type;
+
+	signal = malloc(sizeof(*signal));
+
+	if (!signal)
+		return NULL;
+
+	type = dumbfile_mgetl(f);
+	if (dumbfile_error(f)) {
+		free(signal);
+		return NULL;
+	}
+
+	signal->desc = _dumb_get_sigtype_desc(type);
+	if (!signal->desc) {
+		free(signal);
+		return NULL;
+	}
+
+	if (signal->desc->load_sigdata) {
+		signal->sigdata = (*signal->desc->load_sigdata)(duh, f);
+		if (!signal->sigdata) {
+			free(signal);
+			return NULL;
+		}
+	} else
+		signal->sigdata = NULL;
+
+	return signal;
+}
+
+
+
+/* read_duh(): reads a DUH from an already open DUMBFILE, and returns its
+ * pointer, or null on error. The file is not closed.
+ */
+DUH *DUMBEXPORT read_duh(DUMBFILE *f)
+{
+	DUH *duh;
+	int i;
+
+	if (dumbfile_mgetl(f) != DUH_SIGNATURE)
+		return NULL;
+
+	duh = malloc(sizeof(*duh));
+	if (!duh)
+		return NULL;
+
+	duh->length = dumbfile_igetl(f);
+	if (dumbfile_error(f) || duh->length <= 0) {
+		free(duh);
+		return NULL;
+	}
+
+	duh->n_signals = dumbfile_igetl(f);
+	if (dumbfile_error(f) || duh->n_signals <= 0) {
+		free(duh);
+		return NULL;
+	}
+
+	duh->signal = malloc(sizeof(*duh->signal) * duh->n_signals);
+	if (!duh->signal) {
+		free(duh);
+		return NULL;
+	}
+
+	for (i = 0; i < duh->n_signals; i++)
+		duh->signal[i] = NULL;
+
+	for (i = 0; i < duh->n_signals; i++) {
+		if (!(duh->signal[i] = read_signal(duh, f))) {
+			unload_duh(duh);
+			return NULL;
+		}
+	}
+
+	return duh;
+}
diff --git a/libraries/dumb/src/core/register.c b/libraries/dumb/src/core/register.c
new file mode 100644
index 000000000..7d7cce533
--- /dev/null
+++ b/libraries/dumb/src/core/register.c
@@ -0,0 +1,104 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * register.c - Signal type registration.             / / \  \
+ *                                                   | <  /   \_
+ * By entheh.                                        |  \/ /\   /
+ *                                                    \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <stdlib.h>
+
+#include "dumb.h"
+#include "internal/dumb.h"
+
+
+
+static DUH_SIGTYPE_DESC_LINK *sigtype_desc = NULL;
+static DUH_SIGTYPE_DESC_LINK **sigtype_desc_tail = &sigtype_desc;
+
+
+
+/* destroy_sigtypes(): frees all memory allocated while registering signal
+ * types. This function is set up to be called by dumb_exit().
+ */
+static void destroy_sigtypes(void)
+{
+	DUH_SIGTYPE_DESC_LINK *desc_link = sigtype_desc, *next;
+	sigtype_desc = NULL;
+	sigtype_desc_tail = &sigtype_desc;
+
+	while (desc_link) {
+		next = desc_link->next;
+		free(desc_link);
+		desc_link = next;
+	}
+}
+
+
+
+/* dumb_register_sigtype(): registers a new signal type with DUMB. The signal
+ * type is identified by a four-character string (e.g. "WAVE"), which you can
+ * encode using the the DUMB_ID() macro (e.g. DUMB_ID('W','A','V','E')). The
+ * signal's behaviour is defined by four functions, whose pointers you pass
+ * here. See the documentation for details.
+ *
+ * If a DUH tries to use a signal that has not been registered using this
+ * function, then the library will fail to load the DUH.
+ */
+void DUMBEXPORT dumb_register_sigtype(DUH_SIGTYPE_DESC *desc)
+{
+	DUH_SIGTYPE_DESC_LINK *desc_link = sigtype_desc;
+
+	ASSERT((desc->load_sigdata && desc->unload_sigdata) || (!desc->load_sigdata && !desc->unload_sigdata));
+	ASSERT((desc->start_sigrenderer && desc->end_sigrenderer) || (!desc->start_sigrenderer && !desc->end_sigrenderer));
+	ASSERT(desc->sigrenderer_generate_samples && desc->sigrenderer_get_current_sample);
+
+	if (desc_link) {
+		do {
+			if (desc_link->desc->type == desc->type) {
+				desc_link->desc = desc;
+				return;
+			}
+			desc_link = desc_link->next;
+		} while (desc_link);
+	} else
+		dumb_atexit(&destroy_sigtypes);
+
+	desc_link = *sigtype_desc_tail = malloc(sizeof(DUH_SIGTYPE_DESC_LINK));
+
+	if (!desc_link)
+		return;
+
+	desc_link->next = NULL;
+	sigtype_desc_tail = &desc_link->next;
+
+	desc_link->desc = desc;
+}
+
+
+
+/* _dumb_get_sigtype_desc(): searches the registered functions for a signal
+ * type matching the parameter. If such a sigtype is found, it returns a
+ * pointer to a sigtype descriptor containing the necessary functions to
+ * manage the signal. If none is found, it returns NULL.
+ */
+DUH_SIGTYPE_DESC *_dumb_get_sigtype_desc(int32 type)
+{
+	DUH_SIGTYPE_DESC_LINK *desc_link = sigtype_desc;
+
+	while (desc_link && desc_link->desc->type != type)
+		desc_link = desc_link->next;
+
+	return desc_link ? desc_link->desc : NULL;
+}
diff --git a/libraries/dumb/src/core/rendduh.c b/libraries/dumb/src/core/rendduh.c
new file mode 100644
index 000000000..71f6201d2
--- /dev/null
+++ b/libraries/dumb/src/core/rendduh.c
@@ -0,0 +1,184 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * rendduh.c - Functions for rendering a DUH into     / / \  \
+ *             an end-user sample format.            | <  /   \_
+ *                                                   |  \/ /\   /
+ * By entheh.                                         \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <stdlib.h>
+#include <limits.h>
+
+#include "dumb.h"
+#include "internal/dumb.h"
+
+
+
+/* On the x86, we can use some tricks to speed stuff up */
+#if (defined _MSC_VER) || (defined __DJGPP__) || (defined __MINGW__)
+// Can't we detect Linux and other x86 platforms here? :/
+
+#define FAST_MID(var, min, max) {                  \
+	var -= (min);                                  \
+	var &= (~var) >> (sizeof(var) * CHAR_BIT - 1); \
+	var += (min);                                  \
+	var -= (max);                                  \
+	var &= var >> (sizeof(var) * CHAR_BIT - 1);    \
+	var += (max);                                  \
+}
+
+#define CONVERT8(src, pos, signconv) {       \
+	signed int f = (src + 0x8000) >> 16;     \
+	FAST_MID(f, -128, 127);                  \
+	((char*)sptr)[pos] = (char)f ^ signconv; \
+}
+
+#define CONVERT16(src, pos, signconv) {          \
+	signed int f = (src + 0x80) >> 8;            \
+	FAST_MID(f, -32768, 32767);                  \
+	((short*)sptr)[pos] = (short)(f ^ signconv); \
+}
+
+#else
+
+#define CONVERT8(src, pos, signconv)		  \
+{											  \
+	signed int f = (src + 0x8000) >> 16;	  \
+	f = MID(-128, f, 127);					  \
+	((char *)sptr)[pos] = (char)f ^ signconv; \
+}
+
+
+
+#define CONVERT16(src, pos, signconv)			  \
+{												  \
+	signed int f = (src + 0x80) >> 8;			  \
+	f = MID(-32768, f, 32767);					  \
+	((short *)sptr)[pos] = (short)(f ^ signconv); \
+}
+
+#endif
+
+
+
+/* DEPRECATED */
+DUH_SIGRENDERER *duh_start_renderer(DUH *duh, int n_channels, int32 pos)
+{
+	return duh_start_sigrenderer(duh, 0, n_channels, pos);
+}
+
+
+
+int32 DUMBEXPORT duh_render(
+	DUH_SIGRENDERER *sigrenderer,
+	int bits, int unsign,
+	float volume, float delta,
+	int32 size, void *sptr
+)
+{
+	int32 n;
+
+	sample_t **sampptr;
+
+	int n_channels;
+
+	ASSERT(bits == 8 || bits == 16);
+	ASSERT(sptr);
+
+	if (!sigrenderer)
+		return 0;
+
+	n_channels = duh_sigrenderer_get_n_channels(sigrenderer);
+
+	ASSERT(n_channels > 0);
+	/* This restriction will be removed when need be. At the moment, tightly
+	 * optimised loops exist for exactly one or two channels.
+	 */
+	ASSERT(n_channels <= 2);
+
+	sampptr = allocate_sample_buffer(n_channels, size);
+
+	if (!sampptr)
+		return 0;
+
+	dumb_silence(sampptr[0], n_channels * size);
+
+	size = duh_sigrenderer_generate_samples(sigrenderer, volume, delta, size, sampptr);
+
+	if (bits == 16) {
+		int signconv = unsign ? 0x8000 : 0x0000;
+
+		for (n = 0; n < size * n_channels; n++) {
+			CONVERT16(sampptr[0][n], n, signconv);
+		}
+	} else {
+		char signconv = unsign ? 0x80 : 0x00;
+
+		for (n = 0; n < size * n_channels; n++) {
+			CONVERT8(sampptr[0][n], n, signconv);
+		}
+	}
+
+	destroy_sample_buffer(sampptr);
+
+	return size;
+}
+
+
+
+/* DEPRECATED */
+int duh_renderer_get_n_channels(DUH_SIGRENDERER *dr)
+{
+	return duh_sigrenderer_get_n_channels(dr);
+}
+
+
+
+/* DEPRECATED */
+int32 duh_renderer_get_position(DUH_SIGRENDERER *dr)
+{
+	return duh_sigrenderer_get_position(dr);
+}
+
+
+
+/* DEPRECATED */
+void duh_end_renderer(DUH_SIGRENDERER *dr)
+{
+	duh_end_sigrenderer(dr);
+}
+
+
+
+/* DEPRECATED */
+DUH_SIGRENDERER *duh_renderer_encapsulate_sigrenderer(DUH_SIGRENDERER *sigrenderer)
+{
+	return sigrenderer;
+}
+
+
+
+/* DEPRECATED */
+DUH_SIGRENDERER *duh_renderer_get_sigrenderer(DUH_SIGRENDERER *dr)
+{
+	return dr;
+}
+
+
+
+/* DEPRECATED */
+DUH_SIGRENDERER *duh_renderer_decompose_to_sigrenderer(DUH_SIGRENDERER *dr)
+{
+	return dr;
+}
diff --git a/libraries/dumb/src/core/rendsig.c b/libraries/dumb/src/core/rendsig.c
new file mode 100644
index 000000000..1e6fa1f88
--- /dev/null
+++ b/libraries/dumb/src/core/rendsig.c
@@ -0,0 +1,348 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * rendsig.c - Wrappers to render samples from        / / \  \
+ *             the signals in a DUH.                 | <  /   \_
+ *                                                   |  \/ /\   /
+ * By entheh.                                         \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <stdlib.h>
+
+#include "dumb.h"
+#include "internal/dumb.h"
+
+
+
+struct DUH_SIGRENDERER
+{
+	DUH_SIGTYPE_DESC *desc;
+
+	sigrenderer_t *sigrenderer;
+
+	int n_channels;
+
+	int32 pos;
+	int subpos;
+
+	DUH_SIGRENDERER_SAMPLE_ANALYSER_CALLBACK callback;
+	void *callback_data;
+};
+
+
+
+DUH_SIGRENDERER *DUMBEXPORT duh_start_sigrenderer(DUH *duh, int sig, int n_channels, int32 pos)
+{
+	DUH_SIGRENDERER *sigrenderer;
+
+	DUH_SIGNAL *signal;
+	DUH_START_SIGRENDERER proc;
+
+	/* [RH] Mono destination mixers are disabled. */
+	if (n_channels != 2)
+		return NULL;
+
+	if (!duh)
+		return NULL;
+
+	if ((unsigned int)sig >= (unsigned int)duh->n_signals)
+		return NULL;
+
+	signal = duh->signal[sig];
+	if (!signal)
+		return NULL;
+
+	sigrenderer = malloc(sizeof(*sigrenderer));
+	if (!sigrenderer)
+		return NULL;
+
+	sigrenderer->desc = signal->desc;
+
+	proc = sigrenderer->desc->start_sigrenderer;
+
+	if (proc) {
+		duh->signal[sig] = NULL;
+		sigrenderer->sigrenderer = (*proc)(duh, signal->sigdata, n_channels, pos);
+		duh->signal[sig] = signal;
+
+		if (!sigrenderer->sigrenderer) {
+			free(sigrenderer);
+			return NULL;
+		}
+	} else
+		sigrenderer->sigrenderer = NULL;
+
+	sigrenderer->n_channels = n_channels;
+
+	sigrenderer->pos = pos;
+	sigrenderer->subpos = 0;
+
+	sigrenderer->callback = NULL;
+
+	return sigrenderer;
+}
+
+
+#ifdef DUMB_DECLARE_DEPRECATED
+#include <stdio.h>
+void duh_sigrenderer_set_callback(
+	DUH_SIGRENDERER *sigrenderer,
+	DUH_SIGRENDERER_CALLBACK callback, void *data
+)
+{
+	(void)sigrenderer;
+	(void)callback;
+	(void)data;
+	/*fprintf(stderr,
+		"Call to deprecated function duh_sigrenderer_set_callback(). The callback\n"
+		"was not installed. See dumb/docs/deprec.txt for how to fix this.\n");*/
+}
+
+
+
+void duh_sigrenderer_set_analyser_callback(
+	DUH_SIGRENDERER *sigrenderer,
+	DUH_SIGRENDERER_ANALYSER_CALLBACK callback, void *data
+)
+{
+	(void)sigrenderer;
+	(void)callback;
+	(void)data;
+	fprintf(stderr,
+		"Call to deprecated function duh_sigrenderer_set_analyser_callback(). The\n"
+		"callback was not installed. See dumb/docs/deprec.txt for how to fix this.\n");
+}
+#endif
+
+
+void duh_sigrenderer_set_sample_analyser_callback(
+	DUH_SIGRENDERER *sigrenderer,
+	DUH_SIGRENDERER_SAMPLE_ANALYSER_CALLBACK callback, void *data
+)
+{
+	if (sigrenderer) {
+		sigrenderer->callback = callback;
+		sigrenderer->callback_data = data;
+	}
+}
+
+
+
+int DUMBEXPORT duh_sigrenderer_get_n_channels(DUH_SIGRENDERER *sigrenderer)
+{
+	return sigrenderer ? sigrenderer->n_channels : 0;
+}
+
+
+
+int32 DUMBEXPORT duh_sigrenderer_get_position(DUH_SIGRENDERER *sigrenderer)
+{
+	return sigrenderer ? sigrenderer->pos : -1;
+}
+
+
+
+void DUMBEXPORT duh_sigrenderer_set_sigparam(
+	DUH_SIGRENDERER *sigrenderer,
+	unsigned char id, int32 value
+)
+{
+	DUH_SIGRENDERER_SET_SIGPARAM proc;
+
+	if (!sigrenderer) return;
+
+	proc = sigrenderer->desc->sigrenderer_set_sigparam;
+	if (proc)
+		(*proc)(sigrenderer->sigrenderer, id, value);
+	else
+		TRACE("Parameter #%d = %d for signal %c%c%c%c, which does not take parameters.\n",
+			(int)id,
+			value,
+			(int)(sigrenderer->desc->type >> 24),
+			(int)(sigrenderer->desc->type >> 16),
+			(int)(sigrenderer->desc->type >> 8),
+			(int)(sigrenderer->desc->type));
+}
+
+
+
+int32 DUMBEXPORT duh_sigrenderer_generate_samples(
+	DUH_SIGRENDERER *sigrenderer,
+	double volume, double delta,
+	int32 size, sample_t **samples
+)
+{
+	int32 rendered;
+	LONG_LONG t;
+
+	if (!sigrenderer) return 0;
+
+	rendered = (*sigrenderer->desc->sigrenderer_generate_samples)
+				(sigrenderer->sigrenderer, volume, delta, size, samples);
+
+	if (rendered) {
+		if (sigrenderer->callback)
+			(*sigrenderer->callback)(sigrenderer->callback_data,
+				(const sample_t *const *)samples, sigrenderer->n_channels, rendered);
+
+		t = sigrenderer->subpos + (LONG_LONG)(delta * 65536.0 + 0.5) * rendered;
+
+		sigrenderer->pos += (int32)(t >> 16);
+		sigrenderer->subpos = (int)t & 65535;
+	}
+
+	return rendered;
+}
+
+
+
+/* DEPRECATED */
+int32 duh_sigrenderer_get_samples(
+	DUH_SIGRENDERER *sigrenderer,
+	float volume, float delta,
+	int32 size, sample_t **samples
+)
+{
+	sample_t **s;
+	int32 rendered;
+	int32 i;
+	int j;
+	if (!samples) return duh_sigrenderer_generate_samples(sigrenderer, volume, delta, size, NULL);
+	s = allocate_sample_buffer(sigrenderer->n_channels, size);
+	if (!s) return 0;
+	dumb_silence(s[0], sigrenderer->n_channels * size);
+	rendered = duh_sigrenderer_generate_samples(sigrenderer, volume, delta, size, s);
+	for (j = 0; j < sigrenderer->n_channels; j++)
+		for (i = 0; i < rendered; i++)
+			samples[j][i] += s[0][i*sigrenderer->n_channels+j];
+	destroy_sample_buffer(s);
+	return rendered;
+}
+
+
+
+/* DEPRECATED */
+int32 duh_render_signal(
+	DUH_SIGRENDERER *sigrenderer,
+	float volume, float delta,
+	int32 size, sample_t **samples
+)
+{
+	sample_t **s;
+	int32 rendered;
+	int32 i;
+	int j;
+	if (!samples) return duh_sigrenderer_generate_samples(sigrenderer, volume, delta, size, NULL);
+	s = allocate_sample_buffer(sigrenderer->n_channels, size);
+	if (!s) return 0;
+	dumb_silence(s[0], sigrenderer->n_channels * size);
+	rendered = duh_sigrenderer_generate_samples(sigrenderer, volume, delta, size, s);
+	for (j = 0; j < sigrenderer->n_channels; j++)
+		for (i = 0; i < rendered; i++)
+			samples[j][i] += s[0][i*sigrenderer->n_channels+j] >> 8;
+	destroy_sample_buffer(s);
+	return rendered;
+}
+
+
+
+void DUMBEXPORT duh_sigrenderer_get_current_sample(DUH_SIGRENDERER *sigrenderer, float volume, sample_t *samples)
+{
+	if (sigrenderer)
+		(*sigrenderer->desc->sigrenderer_get_current_sample)(sigrenderer->sigrenderer, volume, samples);
+}
+
+
+
+void DUMBEXPORT duh_end_sigrenderer(DUH_SIGRENDERER *sigrenderer)
+{
+	if (sigrenderer) {
+		if (sigrenderer->desc->end_sigrenderer)
+			if (sigrenderer->sigrenderer)
+				(*sigrenderer->desc->end_sigrenderer)(sigrenderer->sigrenderer);
+
+		free(sigrenderer);
+	}
+}
+
+
+
+DUH_SIGRENDERER *DUMBEXPORT duh_encapsulate_raw_sigrenderer(sigrenderer_t *vsigrenderer, DUH_SIGTYPE_DESC *desc, int n_channels, int32 pos)
+{
+	DUH_SIGRENDERER *sigrenderer;
+
+	if (desc->start_sigrenderer && !vsigrenderer) return NULL;
+
+	sigrenderer = malloc(sizeof(*sigrenderer));
+	if (!sigrenderer) {
+		if (desc->end_sigrenderer)
+			if (vsigrenderer)
+				(*desc->end_sigrenderer)(vsigrenderer);
+		return NULL;
+	}
+
+	sigrenderer->desc = desc;
+	sigrenderer->sigrenderer = vsigrenderer;
+
+	sigrenderer->n_channels = n_channels;
+
+	sigrenderer->pos = pos;
+	sigrenderer->subpos = 0;
+
+	sigrenderer->callback = NULL;
+
+	return sigrenderer;
+}
+
+
+
+sigrenderer_t *DUMBEXPORT duh_get_raw_sigrenderer(DUH_SIGRENDERER *sigrenderer, int32 type)
+{
+	if (sigrenderer && sigrenderer->desc->type == type)
+		return sigrenderer->sigrenderer;
+
+	return NULL;
+}
+
+
+
+#if 0
+// This function is disabled because we don't know whether we want to destroy
+// the sigrenderer if the type doesn't match. We don't even know if we need
+// the function at all. Who would want to keep an IT_SIGRENDERER (for
+// instance) without keeping the DUH_SIGRENDERER?
+sigrenderer_t *duh_decompose_to_raw_sigrenderer(DUH_SIGRENDERER *sigrenderer, int32 type)
+{
+	if (sigrenderer && sigrenderer->desc->type == type) {
+
+
+
+	if (sigrenderer) {
+		if (sigrenderer->desc->end_sigrenderer)
+			if (sigrenderer->sigrenderer)
+				(*sigrenderer->desc->end_sigrenderer)(sigrenderer->sigrenderer);
+
+		free(sigrenderer);
+	}
+
+
+
+
+
+
+		return sigrenderer->sigrenderer;
+	}
+
+	return NULL;
+}
+#endif
diff --git a/libraries/dumb/src/core/unload.c b/libraries/dumb/src/core/unload.c
new file mode 100644
index 000000000..6495ab1f4
--- /dev/null
+++ b/libraries/dumb/src/core/unload.c
@@ -0,0 +1,64 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * unload.c - Code to free a DUH from memory.         / / \  \
+ *                                                   | <  /   \_
+ * By entheh.                                        |  \/ /\   /
+ *                                                    \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <stdlib.h>
+
+#include "dumb.h"
+#include "internal/dumb.h"
+
+
+
+static void destroy_signal(DUH_SIGNAL *signal)
+{
+	if (signal) {
+		if (signal->desc)
+			if (signal->desc->unload_sigdata)
+				if (signal->sigdata)
+					(*signal->desc->unload_sigdata)(signal->sigdata);
+
+		free(signal);
+	}
+}
+
+
+
+/* unload_duh(): destroys a DUH struct. You must call this for every DUH
+ * struct created, when you've finished with it.
+ */
+void DUMBEXPORT unload_duh(DUH *duh)
+{
+	int i;
+
+	if (duh) {
+		if (duh->signal) {
+			for (i = 0; i < duh->n_signals; i++)
+				destroy_signal(duh->signal[i]);
+
+			free(duh->signal);
+		}
+
+		if (duh->tag) {
+			if (duh->tag[0][0])
+				free(duh->tag[0][0]);
+			free(duh->tag);
+		}
+
+		free(duh);
+	}
+}
diff --git a/libraries/dumb/src/helpers/barray.c b/libraries/dumb/src/helpers/barray.c
new file mode 100644
index 000000000..71e8dc352
--- /dev/null
+++ b/libraries/dumb/src/helpers/barray.c
@@ -0,0 +1,189 @@
+#include "internal/barray.h"
+
+#include <string.h>
+
+
+void * bit_array_create(size_t size)
+{
+	size_t bsize = ((size + 7) >> 3) + sizeof(size_t);
+	void * ret = calloc(1, bsize);
+	if (ret) *(size_t *)ret = size;
+	return ret;
+}
+
+void bit_array_destroy(void * array)
+{
+	if (array) free(array);
+}
+
+void * bit_array_dup(void * array)
+{
+	if (array)
+	{
+		size_t * size = (size_t *) array;
+		size_t bsize = ((*size + 7) >> 3) + sizeof(*size);
+		void * ret = malloc(bsize);
+		if (ret) memcpy(ret, array, bsize);
+		return ret;
+	}
+	return NULL;
+}
+
+void bit_array_reset(void * array)
+{
+	if (array)
+	{
+		size_t * size = (size_t *) array;
+		size_t bsize = (*size + 7) >> 3;
+		memset(size + 1, 0, bsize);
+	}
+}
+
+
+void bit_array_set(void * array, size_t bit)
+{
+	if (array)
+	{
+		size_t * size = (size_t *) array;
+		if (bit < *size)
+		{
+			unsigned char * ptr = (unsigned char *)(size + 1);
+			ptr[bit >> 3] |= (1U << (bit & 7));
+		}
+	}
+}
+
+void bit_array_set_range(void * array, size_t bit, size_t count)
+{
+    if (array && count)
+    {
+        size_t * size = (size_t *) array;
+        if (bit < *size)
+        {
+            unsigned char * ptr = (unsigned char *)(size + 1);
+            size_t i;
+            for (i = bit; i < *size && i < bit + count; ++i)
+                ptr[i >> 3] |= (1U << (i & 7));
+        }
+    }
+}
+
+int bit_array_test(void * array, size_t bit)
+{
+	if (array)
+	{
+		size_t * size = (size_t *) array;
+		if (bit < *size)
+		{
+			unsigned char * ptr = (unsigned char *)(size + 1);
+			if (ptr[bit >> 3] & (1U << (bit & 7)))
+			{
+				return 1;
+			}
+		}
+	}
+	return 0;
+}
+
+int bit_array_test_range(void * array, size_t bit, size_t count)
+{
+	if (array)
+	{
+		size_t * size = (size_t *) array;
+		if (bit < *size)
+		{
+			unsigned char * ptr = (unsigned char *)(size + 1);
+			if ((bit & 7) && (count > 8))
+			{
+				while ((bit < *size) && count && (bit & 7))
+				{
+					if (ptr[bit >> 3] & (1U << (bit & 7))) return 1;
+					bit++;
+					count--;
+				}
+			}
+			if (!(bit & 7))
+			{
+				while (((*size - bit) >= 8) && (count >= 8))
+				{
+					if (ptr[bit >> 3]) return 1;
+					bit += 8;
+					count -= 8;
+				}
+			}
+			while ((bit < *size) && count)
+			{
+				if (ptr[bit >> 3] & (1U << (bit & 7))) return 1;
+				bit++;
+				count--;
+			}
+		}
+	}
+	return 0;
+}
+
+void bit_array_clear(void * array, size_t bit)
+{
+	if (array)
+	{
+		size_t * size = (size_t *) array;
+		if (bit < *size)
+		{
+			unsigned char * ptr = (unsigned char *)(size + 1);
+			ptr[bit >> 3] &= ~(1U << (bit & 7));
+		}
+	}
+}
+
+void bit_array_clear_range(void * array, size_t bit, size_t count)
+{
+    if (array && count)
+    {
+        size_t * size = (size_t *) array;
+        if (bit < *size)
+        {
+            unsigned char * ptr = (unsigned char *)(size + 1);
+            size_t i;
+            for (i = bit; i < *size && i < bit + count; ++i)
+                ptr[i >> 3] &= ~(1U << (i & 7));
+        }
+    }
+}
+
+void bit_array_merge(void * dest, void * source, size_t offset)
+{
+	if (dest && source)
+	{
+		size_t * dsize = (size_t *) dest;
+		size_t * ssize = (size_t *) source;
+		size_t soffset = 0;
+		while (offset < *dsize && soffset < *ssize)
+		{
+			if (bit_array_test(source, soffset))
+			{
+				bit_array_set(dest, offset);
+			}
+			soffset++;
+			offset++;
+		}
+	}
+}
+
+void bit_array_mask(void * dest, void * source, size_t offset)
+{
+	if (dest && source)
+	{
+		size_t * dsize = (size_t *) dest;
+		size_t * ssize = (size_t *) source;
+		size_t soffset = 0;
+		while (offset < *dsize && soffset < *ssize)
+		{
+			if (bit_array_test(source, soffset))
+			{
+				bit_array_clear(dest, offset);
+			}
+			soffset++;
+			offset++;
+		}
+	}
+}
diff --git a/libraries/dumb/src/helpers/clickrem.c b/libraries/dumb/src/helpers/clickrem.c
new file mode 100644
index 000000000..e1db4a663
--- /dev/null
+++ b/libraries/dumb/src/helpers/clickrem.c
@@ -0,0 +1,306 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * clickrem.c - Click removal helpers.                / / \  \
+ *                                                   | <  /   \_
+ * By entheh.                                        |  \/ /\   /
+ *                                                    \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <stdlib.h>
+#include <math.h>
+#include "dumb.h"
+
+
+
+typedef struct DUMB_CLICK DUMB_CLICK;
+
+
+struct DUMB_CLICK_REMOVER
+{
+	DUMB_CLICK *click;
+	int n_clicks;
+
+	int offset;
+
+	DUMB_CLICK *free_clicks;
+};
+
+
+struct DUMB_CLICK
+{
+	DUMB_CLICK *next;
+	int32 pos;
+	sample_t step;
+};
+
+
+static DUMB_CLICK *alloc_click(DUMB_CLICK_REMOVER *cr)
+{
+	if (cr->free_clicks != NULL)
+	{
+		DUMB_CLICK *click = cr->free_clicks;
+		cr->free_clicks = click->next;
+		return click;
+	}
+	return malloc(sizeof(DUMB_CLICK));
+}
+
+static void free_click(DUMB_CLICK_REMOVER *cr, DUMB_CLICK *cl)
+{
+	cl->next = cr->free_clicks;
+	cr->free_clicks = cl;
+}
+
+DUMB_CLICK_REMOVER *DUMBEXPORT dumb_create_click_remover(void)
+{
+	DUMB_CLICK_REMOVER *cr = malloc(sizeof(*cr));
+	if (!cr) return NULL;
+
+	cr->click = NULL;
+	cr->n_clicks = 0;
+
+	cr->offset = 0;
+	cr->free_clicks = NULL;
+
+	return cr;
+}
+
+
+
+void DUMBEXPORT dumb_record_click(DUMB_CLICK_REMOVER *cr, int32 pos, sample_t step)
+{
+	DUMB_CLICK *click;
+
+	ASSERT(pos >= 0);
+
+	if (!cr || !step) return;
+
+	if (pos == 0) {
+		cr->offset -= step;
+		return;
+	}
+
+	click = alloc_click(cr);
+	if (!click) return;
+
+	click->pos = pos;
+	click->step = step;
+
+	click->next = cr->click;
+	cr->click = click;
+	cr->n_clicks++;
+}
+
+
+
+static DUMB_CLICK *dumb_click_mergesort(DUMB_CLICK *click, int n_clicks)
+{
+	int i;
+	DUMB_CLICK *c1, *c2, **cp;
+
+	if (n_clicks <= 1) return click;
+
+	/* Split the list into two */
+	c1 = click;
+	cp = &c1;
+	for (i = 0; i < n_clicks; i += 2) cp = &(*cp)->next;
+	c2 = *cp;
+	*cp = NULL;
+
+	/* Sort the sublists */
+	c1 = dumb_click_mergesort(c1, (n_clicks + 1) >> 1);
+	c2 = dumb_click_mergesort(c2, n_clicks >> 1);
+
+	/* Merge them */
+	cp = &click;
+	while (c1 && c2) {
+		if (c1->pos > c2->pos) {
+			*cp = c2;
+			c2 = c2->next;
+		} else {
+			*cp = c1;
+			c1 = c1->next;
+		}
+		cp = &(*cp)->next;
+	}
+	if (c2)
+		*cp = c2;
+	else
+		*cp = c1;
+
+	return click;
+}
+
+
+
+void DUMBEXPORT dumb_remove_clicks(DUMB_CLICK_REMOVER *cr, sample_t *samples, int32 length, int step, double halflife)
+{
+	DUMB_CLICK *click;
+	int32 pos = 0;
+	int offset;
+	int factor;
+
+	if (!cr) return;
+
+	factor = (int)floor(pow(0.5, 1.0/halflife) * (1U << 31));
+
+	click = dumb_click_mergesort(cr->click, cr->n_clicks);
+	cr->click = NULL;
+	cr->n_clicks = 0;
+
+	length *= step;
+
+	while (click) {
+		DUMB_CLICK *next = click->next;
+		int end = click->pos * step;
+		ASSERT(end <= length);
+		offset = cr->offset;
+		if (offset < 0) {
+			offset = -offset;
+			while (pos < end) {
+				samples[pos] -= offset;
+				offset = (int)(((LONG_LONG)(offset << 1) * factor) >> 32);
+				pos += step;
+			}
+			offset = -offset;
+		} else {
+			while (pos < end) {
+				samples[pos] += offset;
+				offset = (int)(((LONG_LONG)(offset << 1) * factor) >> 32);
+				pos += step;
+			}
+		}
+		cr->offset = offset - click->step;
+		free_click(cr, click);
+		click = next;
+	}
+
+	offset = cr->offset;
+	if (offset < 0) {
+		offset = -offset;
+		while (pos < length) {
+			samples[pos] -= offset;
+			offset = (int)((LONG_LONG)(offset << 1) * factor >> 32);
+			pos += step;
+		}
+		offset = -offset;
+	} else {
+		while (pos < length) {
+			samples[pos] += offset;
+			offset = (int)((LONG_LONG)(offset << 1) * factor >> 32);
+			pos += step;
+		}
+	}
+	cr->offset = offset;
+}
+
+
+
+sample_t DUMBEXPORT dumb_click_remover_get_offset(DUMB_CLICK_REMOVER *cr)
+{
+	return cr ? cr->offset : 0;
+}
+
+
+
+void DUMBEXPORT dumb_destroy_click_remover(DUMB_CLICK_REMOVER *cr)
+{
+	if (cr) {
+		DUMB_CLICK *click = cr->click;
+		while (click) {
+			DUMB_CLICK *next = click->next;
+			free(click);
+			click = next;
+		}
+		click = cr->free_clicks;
+		while (click) {
+			DUMB_CLICK *next = click->next;
+			free(click);
+			click = next;
+		}
+		free(cr);
+	}
+}
+
+
+
+DUMB_CLICK_REMOVER **DUMBEXPORT dumb_create_click_remover_array(int n)
+{
+	int i;
+	DUMB_CLICK_REMOVER **cr;
+	if (n <= 0) return NULL;
+	cr = malloc(n * sizeof(*cr));
+	if (!cr) return NULL;
+	for (i = 0; i < n; i++) cr[i] = dumb_create_click_remover();
+	return cr;
+}
+
+
+
+void DUMBEXPORT dumb_record_click_array(int n, DUMB_CLICK_REMOVER **cr, int32 pos, sample_t *step)
+{
+	if (cr) {
+		int i;
+		for (i = 0; i < n; i++)
+			dumb_record_click(cr[i], pos, step[i]);
+	}
+}
+
+
+
+void DUMBEXPORT dumb_record_click_negative_array(int n, DUMB_CLICK_REMOVER **cr, int32 pos, sample_t *step)
+{
+	if (cr) {
+		int i;
+		for (i = 0; i < n; i++)
+			dumb_record_click(cr[i], pos, -step[i]);
+	}
+}
+
+
+
+void DUMBEXPORT dumb_remove_clicks_array(int n, DUMB_CLICK_REMOVER **cr, sample_t **samples, int32 length, double halflife)
+{
+	if (cr) {
+		int i;
+		for (i = 0; i < n >> 1; i++) {
+			dumb_remove_clicks(cr[i << 1], samples[i], length, 2, halflife);
+			dumb_remove_clicks(cr[(i << 1) + 1], samples[i] + 1, length, 2, halflife);
+		}
+		if (n & 1)
+			dumb_remove_clicks(cr[i << 1], samples[i], length, 1, halflife);
+	}
+}
+
+
+
+void DUMBEXPORT dumb_click_remover_get_offset_array(int n, DUMB_CLICK_REMOVER **cr, sample_t *offset)
+{
+	if (cr) {
+		int i;
+		for (i = 0; i < n; i++)
+			if (cr[i]) offset[i] += cr[i]->offset;
+	}
+}
+
+
+
+void DUMBEXPORT dumb_destroy_click_remover_array(int n, DUMB_CLICK_REMOVER **cr)
+{
+	if (cr) {
+		int i;
+		for (i = 0; i < n; i++) dumb_destroy_click_remover(cr[i]);
+		free(cr);
+	}
+}
diff --git a/libraries/dumb/src/helpers/lpc.c b/libraries/dumb/src/helpers/lpc.c
new file mode 100644
index 000000000..c77516892
--- /dev/null
+++ b/libraries/dumb/src/helpers/lpc.c
@@ -0,0 +1,320 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009             *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function: LPC low level routines
+  last mod: $Id: lpc.c 16227 2009-07-08 06:58:46Z xiphmont $
+
+ ********************************************************************/
+
+/* Some of these routines (autocorrelator, LPC coefficient estimator)
+   are derived from code written by Jutta Degener and Carsten Bormann;
+   thus we include their copyright below.  The entirety of this file
+   is freely redistributable on the condition that both of these
+   copyright notices are preserved without modification.  */
+
+/* Preserved Copyright: *********************************************/
+
+/* Copyright 1992, 1993, 1994 by Jutta Degener and Carsten Bormann,
+Technische Universita"t Berlin
+
+Any use of this software is permitted provided that this notice is not
+removed and that neither the authors nor the Technische Universita"t
+Berlin are deemed to have made any representations as to the
+suitability of this software for any purpose nor are held responsible
+for any defects of this software. THERE IS ABSOLUTELY NO WARRANTY FOR
+THIS SOFTWARE.
+
+As a matter of courtesy, the authors request to be informed about uses
+this software has found, about bugs in this software, and about any
+improvements that may be of general interest.
+
+Berlin, 28.11.1994
+Jutta Degener
+Carsten Bormann
+
+*********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "internal/stack_alloc.h"
+#include "internal/lpc.h"
+
+/* Autocorrelation LPC coeff generation algorithm invented by
+   N. Levinson in 1947, modified by J. Durbin in 1959. */
+
+/* Input : n elements of time doamin data
+   Output: m lpc coefficients, excitation energy */
+
+float vorbis_lpc_from_data(float *data,float *lpci,int n,int m){
+  double *aut=alloca(sizeof(*aut)*(m+1));
+  double *lpc=alloca(sizeof(*lpc)*(m));
+  double error;
+  double epsilon;
+  int i,j;
+
+  /* autocorrelation, p+1 lag coefficients */
+  j=m+1;
+  while(j--){
+    double d=0; /* double needed for accumulator depth */
+    for(i=j;i<n;i++)d+=(double)data[i]*data[(i-j)];
+    aut[j]=d;
+  }
+
+  /* Generate lpc coefficients from autocorr values */
+
+  /* set our noise floor to about -100dB */
+  error=aut[0] * (1. + 1e-10);
+  epsilon=1e-9*aut[0]+1e-10;
+
+  for(i=0;i<m;i++){
+    double r= -aut[i+1];
+
+    if(error<epsilon){
+      memset(lpc+i,0,(m-i)*sizeof(*lpc));
+      goto done;
+    }
+
+    /* Sum up this iteration's reflection coefficient; note that in
+       Vorbis we don't save it.  If anyone wants to recycle this code
+       and needs reflection coefficients, save the results of 'r' from
+       each iteration. */
+
+    for(j=0;j<i;j++)r-=lpc[j]*aut[i-j];
+    r/=error;
+
+    /* Update LPC coefficients and total error */
+
+    lpc[i]=r;
+    for(j=0;j<i/2;j++){
+      double tmp=lpc[j];
+
+      lpc[j]+=r*lpc[i-1-j];
+      lpc[i-1-j]+=r*tmp;
+    }
+    if(i&1)lpc[j]+=lpc[j]*r;
+
+    error*=1.-r*r;
+
+  }
+
+ done:
+
+  /* slightly damp the filter */
+  {
+    double g = .99;
+    double damp = g;
+    for(j=0;j<m;j++){
+      lpc[j]*=damp;
+      damp*=g;
+    }
+  }
+
+  for(j=0;j<m;j++)lpci[j]=(float)lpc[j];
+
+  /* we need the error value to know how big an impulse to hit the
+     filter with later */
+
+  return (float)error;
+}
+
+void vorbis_lpc_predict(float *coeff,float *prime,int m,
+                     float *data,long n){
+
+  /* in: coeff[0...m-1] LPC coefficients
+         prime[0...m-1] initial values (allocated size of n+m-1)
+    out: data[0...n-1] data samples */
+
+  long i,j,o,p;
+  float y;
+  float *work=alloca(sizeof(*work)*(m+n));
+
+  if(!prime)
+    for(i=0;i<m;i++)
+      work[i]=0.f;
+  else
+    for(i=0;i<m;i++)
+      work[i]=prime[i];
+
+  for(i=0;i<n;i++){
+    y=0;
+    o=i;
+    p=m;
+    for(j=0;j<m;j++)
+      y-=work[o++]*coeff[--p];
+
+    data[i]=work[o]=y;
+  }
+}
+
+#include "dumb.h"
+#include "internal/dumb.h"
+#include "internal/it.h"
+
+enum { lpc_max   = 256 }; /* Maximum number of input samples to train the function */
+enum { lpc_order = 32  }; /* Order of the filter */
+enum { lpc_extra = 64  }; /* How many samples of padding to predict or silence */
+
+
+/* This extra sample padding is really only needed by the FIR resampler, but it helps the other resamplers as well. */
+
+void dumb_it_add_lpc(struct DUMB_IT_SIGDATA *sigdata){
+    float lpc[lpc_order * 2];
+    float lpc_input[lpc_max * 2];
+    float lpc_output[lpc_extra * 2];
+
+    signed char * s8;
+    signed short * s16;
+
+    int n, o, offset, lpc_samples;
+
+    for ( n = 0; n < sigdata->n_samples; n++ ) {
+        IT_SAMPLE * sample = sigdata->sample + n;
+        if ( ( sample->flags & ( IT_SAMPLE_EXISTS | IT_SAMPLE_LOOP) ) == IT_SAMPLE_EXISTS ) {
+            /* If we have enough sample data to train the filter, use the filter to generate the padding */
+            if ( sample->length >= lpc_order ) {
+                lpc_samples = sample->length;
+                if (lpc_samples > lpc_max) lpc_samples = lpc_max;
+                offset = sample->length - lpc_samples;
+
+                if ( sample->flags & IT_SAMPLE_STEREO )
+                {
+                    if ( sample->flags & IT_SAMPLE_16BIT )
+                    {
+                        s16 = ( signed short * ) sample->data;
+                        s16 += offset * 2;
+                        for ( o = 0; o < lpc_samples; o++ )
+                        {
+                            lpc_input[ o ] = s16[ o * 2 + 0 ];
+                            lpc_input[ o + lpc_max ] = s16[ o * 2 + 1 ];
+                        }
+                    }
+                    else
+                    {
+                        s8 = ( signed char * ) sample->data;
+                        s8 += offset * 2;
+                        for ( o = 0; o < lpc_samples; o++ )
+                        {
+                            lpc_input[ o ] = s8[ o * 2 + 0 ];
+                            lpc_input[ o + lpc_max ] = s8[ o * 2 + 1 ];
+                        }
+                    }
+
+                    vorbis_lpc_from_data( lpc_input, lpc, lpc_samples, lpc_order );
+                    vorbis_lpc_from_data( lpc_input + lpc_max, lpc + lpc_order, lpc_samples, lpc_order );
+
+                    vorbis_lpc_predict( lpc, lpc_input + lpc_samples - lpc_order, lpc_order, lpc_output, lpc_extra );
+                    vorbis_lpc_predict( lpc + lpc_order, lpc_input + lpc_max + lpc_samples - lpc_order, lpc_order, lpc_output + lpc_extra, lpc_extra );
+
+                    if ( sample->flags & IT_SAMPLE_16BIT )
+                    {
+                        s16 = ( signed short * ) realloc( sample->data, ( sample->length + lpc_extra ) * 2 * sizeof(short) );
+                        sample->data = s16;
+
+                        s16 += sample->length * 2;
+                        sample->length += lpc_extra;
+
+                        for ( o = 0; o < lpc_extra; o++ )
+                        {
+                            s16[ o * 2 + 0 ] = (signed short)lpc_output[ o ];
+                            s16[ o * 2 + 1 ] = (signed short)lpc_output[ o + lpc_extra ];
+                        }
+                    }
+                    else
+                    {
+                        s8 = ( signed char * ) realloc( sample->data, ( sample->length + lpc_extra ) * 2 );
+                        sample->data = s8;
+
+                        s8 += sample->length * 2;
+                        sample->length += lpc_extra;
+
+                        for ( o = 0; o < lpc_extra; o++ )
+                        {
+                            s8[ o * 2 + 0 ] = (signed char)lpc_output[ o ];
+                            s8[ o * 2 + 1 ] = (signed char)lpc_output[ o + lpc_extra ];
+                        }
+                    }
+                }
+                else
+                {
+                    if ( sample->flags & IT_SAMPLE_16BIT )
+                    {
+                        s16 = ( signed short * ) sample->data;
+                        s16 += offset;
+                        for ( o = 0; o < lpc_samples; o++ )
+                        {
+                            lpc_input[ o ] = s16[ o ];
+                        }
+                    }
+                    else
+                    {
+                        s8 = ( signed char * ) sample->data;
+                        s8 += offset;
+                        for ( o = 0; o < lpc_samples; o++ )
+                        {
+                            lpc_input[ o ] = s8[ o ];
+                        }
+                    }
+
+                    vorbis_lpc_from_data( lpc_input, lpc, lpc_samples, lpc_order );
+
+                    vorbis_lpc_predict( lpc, lpc_input + lpc_samples - lpc_order, lpc_order, lpc_output, lpc_extra );
+
+                    if ( sample->flags & IT_SAMPLE_16BIT )
+                    {
+                        s16 = ( signed short * ) realloc( sample->data, ( sample->length + lpc_extra ) * sizeof(short) );
+                        sample->data = s16;
+
+                        s16 += sample->length;
+                        sample->length += lpc_extra;
+
+                        for ( o = 0; o < lpc_extra; o++ )
+                        {
+                            s16[ o ] = (signed short)lpc_output[ o ];
+                        }
+                    }
+                    else
+                    {
+                        s8 = ( signed char * ) realloc( sample->data, sample->length + lpc_extra );
+                        sample->data = s8;
+
+                        s8 += sample->length;
+                        sample->length += lpc_extra;
+
+                        for ( o = 0; o < lpc_extra; o++ )
+                        {
+                            s8[ o ] = (signed char)lpc_output[ o ];
+                        }
+                    }
+                }
+            }
+            else
+            /* Otherwise, pad with silence. */
+            {
+                offset = sample->length;
+                lpc_samples = lpc_extra;
+
+                sample->length += lpc_samples;
+
+                n = 1;
+                if ( sample->flags & IT_SAMPLE_STEREO ) n *= 2;
+                if ( sample->flags & IT_SAMPLE_16BIT ) n *= 2;
+
+                offset *= n;
+                lpc_samples *= n;
+
+                sample->data = realloc( sample->data, offset + lpc_samples );
+                memset( (char*)sample->data + offset, 0, lpc_samples );
+            }
+        }
+    }
+}
diff --git a/libraries/dumb/src/helpers/memfile.c b/libraries/dumb/src/helpers/memfile.c
new file mode 100644
index 000000000..476683944
--- /dev/null
+++ b/libraries/dumb/src/helpers/memfile.c
@@ -0,0 +1,117 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * memfile.c - Module for reading data from           / / \  \
+ *             memory using a DUMBFILE.              | <  /   \_
+ *                                                   |  \/ /\   /
+ * By entheh.                                         \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "dumb.h"
+
+
+
+typedef struct MEMFILE MEMFILE;
+
+struct MEMFILE
+{
+	const char *ptr, *ptr_begin;
+	long left, size;
+};
+
+
+
+static int DUMBCALLBACK dumb_memfile_skip(void *f, long n)
+{
+	MEMFILE *m = f;
+	if (n > m->left) return -1;
+	m->ptr += n;
+	m->left -= n;
+	return 0;
+}
+
+
+
+static int DUMBCALLBACK dumb_memfile_getc(void *f)
+{
+	MEMFILE *m = f;
+	if (m->left <= 0) return -1;
+	m->left--;
+	return *(const unsigned char *)m->ptr++;
+}
+
+
+
+static int32 DUMBCALLBACK dumb_memfile_getnc(char *ptr, int32 n, void *f)
+{
+	MEMFILE *m = f;
+	if (n > m->left) n = m->left;
+	memcpy(ptr, m->ptr, n);
+	m->ptr += n;
+	m->left -= n;
+	return n;
+}
+
+
+
+static void DUMBCALLBACK dumb_memfile_close(void *f)
+{
+	free(f);
+}
+
+
+static int DUMBCALLBACK dumb_memfile_seek(void *f, long n)
+{
+	MEMFILE *m = f;
+
+	m->ptr = m->ptr_begin + n;
+	m->left = m->size - n;
+
+	return 0;
+}
+
+
+static long DUMBCALLBACK dumb_memfile_get_size(void *f)
+{
+	MEMFILE *m = f;
+	return m->size;
+}
+
+
+static const DUMBFILE_SYSTEM memfile_dfs = {
+	NULL,
+	&dumb_memfile_skip,
+	&dumb_memfile_getc,
+	&dumb_memfile_getnc,
+	&dumb_memfile_close,
+	&dumb_memfile_seek,
+	&dumb_memfile_get_size
+};
+
+
+
+DUMBFILE *DUMBEXPORT dumbfile_open_memory(const char *data, int32 size)
+{
+	MEMFILE *m = malloc(sizeof(*m));
+	if (!m) return NULL;
+
+	m->ptr_begin = data;
+	m->ptr = data;
+	m->left = size;
+	m->size = size;
+
+	return dumbfile_open_ex(m, &memfile_dfs);
+}
diff --git a/libraries/dumb/src/helpers/resamp2.inc b/libraries/dumb/src/helpers/resamp2.inc
new file mode 100644
index 000000000..63b59e94e
--- /dev/null
+++ b/libraries/dumb/src/helpers/resamp2.inc
@@ -0,0 +1,174 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * resamp2.inc - Resampling helper template.          / / \  \
+ *                                                   | <  /   \_
+ * By Bob and entheh.                                |  \/ /\   /
+ *                                                    \_  /  > /
+ * In order to find a good trade-off between            | \ / /
+ * speed and accuracy in this code, some tests          |  ' /
+ * were carried out regarding the behaviour of           \__/
+ * long long ints with gcc. The following code
+ * was tested:
+ *
+ * int a, b, c;
+ * c = ((long long)a * b) >> 16;
+ *
+ * DJGPP GCC Version 3.0.3 generated the following assembly language code for
+ * the multiplication and scaling, leaving the 32-bit result in EAX.
+ *
+ * movl  -8(%ebp), %eax    ; read one int into EAX
+ * imull -4(%ebp)          ; multiply by the other; result goes in EDX:EAX
+ * shrdl $16, %edx, %eax   ; shift EAX right 16, shifting bits in from EDX
+ *
+ * Note that a 32*32->64 multiplication is performed, allowing for high
+ * accuracy. On the Pentium 2 and above, shrdl takes two cycles (generally),
+ * so it is a minor concern when four multiplications are being performed
+ * (the cubic resampler). On the Pentium MMX and earlier, it takes four or
+ * more cycles, so this method is unsuitable for use in the low-quality
+ * resamplers.
+ *
+ * Since "long long" is a gcc-specific extension, we use LONG_LONG instead,
+ * defined in dumb.h. We may investigate later what code MSVC generates, but
+ * if it seems too slow then we suggest you use a good compiler.
+ *
+ * FIXME: these comments are somewhat out of date now.
+ */
+
+
+
+#define SUFFIX3 _2
+
+/* For convenience, returns nonzero on stop. */
+static int process_pickup(DUMB_RESAMPLER *resampler)
+{
+	if (resampler->overshot < 0) {
+		resampler->overshot = 0;
+		dumb_resample(resampler, NULL, 2, MONO_DEST_VOLUME_ZEROS, 1.0f); /* Doesn't matter which SUFFIX3. */
+		COPYSRC(resampler->X, 0, resampler->X, 1);
+	}
+
+	for (;;) {
+		SRCTYPE *src = resampler->src;
+
+		if (resampler->dir < 0) {
+			if (resampler->overshot >= 3 && resampler->pos+3 >= resampler->start) COPYSRC(resampler->X, 0, src, resampler->pos+3);
+			if (resampler->overshot >= 2 && resampler->pos+2 >= resampler->start) COPYSRC(resampler->X, 1, src, resampler->pos+2);
+			if (resampler->overshot >= 1 && resampler->pos+1 >= resampler->start) COPYSRC(resampler->X, 2, src, resampler->pos+1);
+			resampler->overshot = resampler->start - resampler->pos - 1;
+		} else {
+			if (resampler->overshot >= 3 && resampler->pos-3 < resampler->end) COPYSRC(resampler->X, 0, src, resampler->pos-3);
+			if (resampler->overshot >= 2 && resampler->pos-2 < resampler->end) COPYSRC(resampler->X, 1, src, resampler->pos-2);
+			if (resampler->overshot >= 1 && resampler->pos-1 < resampler->end) COPYSRC(resampler->X, 2, src, resampler->pos-1);
+			resampler->overshot = resampler->pos - resampler->end;
+		}
+
+		if (resampler->overshot < 0) {
+			resampler->overshot = 0;
+			return 0;
+		}
+
+		if (!resampler->pickup) {
+			resampler->dir = 0;
+			return 1;
+		}
+		(*resampler->pickup)(resampler, resampler->pickup_data);
+		if (resampler->dir == 0) return 1;
+		ASSERT(resampler->dir == -1 || resampler->dir == 1);
+	}
+}
+
+
+
+/* Create mono destination resampler. */
+/* SUFFIX3 was set above. */
+#if 0
+#define VOLUME_PARAMETERS MONO_DEST_VOLUME_PARAMETERS
+#define VOLUME_VARIABLES MONO_DEST_VOLUME_VARIABLES
+#define SET_VOLUME_VARIABLES SET_MONO_DEST_VOLUME_VARIABLES
+#define RETURN_VOLUME_VARIABLES RETURN_MONO_DEST_VOLUME_VARIABLES
+#define VOLUMES_ARE_ZERO MONO_DEST_VOLUMES_ARE_ZERO
+#define PEEK_FIR MONO_DEST_PEEK_FIR
+#define MIX_FIR MONO_DEST_MIX_FIR
+#define MIX_ZEROS(op) *dst++ op 0
+#include "resamp3.inc"
+#else
+#undef SUFFIX3
+#endif
+
+/* Create stereo destination resampler. */
+#define SUFFIX3 _2
+#define VOLUME_PARAMETERS DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right
+#define VOLUME_VARIABLES lvol, lvolr, lvold, lvolt, lvolm, rvol, rvolr, rvold, rvolt, rvolm
+#define SET_VOLUME_VARIABLES { \
+	if ( volume_left ) { \
+		lvolr = xs_FloorToInt(volume_left->volume * 16777216.f); \
+		lvold = xs_FloorToInt(volume_left->delta * 16777216.f); \
+		lvolt = xs_FloorToInt(volume_left->target * 16777216.f); \
+		lvolm = xs_FloorToInt(volume_left->mix * 16777216.f); \
+		lvol = MULSCV( lvolr, lvolm ); \
+		if ( lvolr == lvolt ) volume_left = NULL; \
+	} else { \
+		lvol = 0; \
+		lvold = 0; \
+		lvolt = 0; \
+		lvolm = 0; \
+	} \
+	if ( volume_right ) { \
+		rvolr = xs_FloorToInt(volume_right->volume * 16777216.f); \
+		rvold = xs_FloorToInt(volume_right->delta * 16777216.f); \
+		rvolt = xs_FloorToInt(volume_right->target * 16777216.f); \
+		rvolm = xs_FloorToInt(volume_right->mix * 16777216.f); \
+		rvol = MULSCV( rvolr, rvolm ); \
+		if ( rvolr == rvolt ) volume_right = NULL; \
+	} else { \
+		rvol = 0; \
+		rvold = 0; \
+		rvolt = 0; \
+		rvolm = 0; \
+	} \
+}
+#define RETURN_VOLUME_VARIABLES { \
+	if ( volume_left ) volume_left->volume = (float)lvolr / 16777216.0f; \
+	if ( volume_right ) volume_right->volume = (float)rvolr / 16777216.0f; \
+}
+#define VOLUMES_ARE_ZERO (lvol == 0 && lvolt == 0 && rvol == 0 && rvolt == 0)
+#define MIX_ALIAS(op, upd, offset) STEREO_DEST_MIX_ALIAS(op, upd, offset)
+#define MIX_LINEAR(op, upd, o0, o1) STEREO_DEST_MIX_LINEAR(op, upd, o0, o1)
+#define MIX_CUBIC(op, upd, x0, x3, o0, o1, o2, o3) STEREO_DEST_MIX_CUBIC(op, upd, x0, x3, o0, o1, o2, o3)
+#define PEEK_FIR STEREO_DEST_PEEK_FIR
+#define MIX_FIR STEREO_DEST_MIX_FIR
+#define MIX_ZEROS(op) { *dst++ op 0; *dst++ op 0; }
+#include "resamp3.inc"
+
+
+
+#undef STEREO_DEST_MIX_CUBIC
+#undef STEREO_DEST_MIX_LINEAR
+#undef STEREO_DEST_MIX_ALIAS
+#undef MONO_DEST_VOLUMES_ARE_ZERO
+#undef SET_MONO_DEST_VOLUME_VARIABLES
+#undef RETURN_MONO_DEST_VOLUME_VARIABLES
+#undef MONO_DEST_VOLUME_ZEROS
+#undef MONO_DEST_VOLUME_VARIABLES
+#undef MONO_DEST_VOLUME_PARAMETERS
+#undef STEREO_DEST_PEEK_ALIAS
+#undef POKE_ALIAS
+#undef MONO_DEST_PEEK_FIR
+#undef STEREO_DEST_PEEK_FIR
+#undef MONO_DEST_MIX_FIR
+#undef STEREO_DEST_MIX_FIR
+#undef ADVANCE_FIR
+#undef POKE_FIR
+#undef COPYSRC2
+#undef COPYSRC
+#undef DIVIDE_BY_SRC_CHANNELS
+#undef SRC_CHANNELS
+#undef SUFFIX2
diff --git a/libraries/dumb/src/helpers/resamp3.inc b/libraries/dumb/src/helpers/resamp3.inc
new file mode 100644
index 000000000..5fc13618b
--- /dev/null
+++ b/libraries/dumb/src/helpers/resamp3.inc
@@ -0,0 +1,436 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * resamp3.inc - Resampling helper template.          / / \  \
+ *                                                   | <  /   \_
+ * By Bob and entheh.                                |  \/ /\   /
+ *                                                    \_  /  > /
+ * In order to find a good trade-off between            | \ / /
+ * speed and accuracy in this code, some tests          |  ' /
+ * were carried out regarding the behaviour of           \__/
+ * long long ints with gcc. The following code
+ * was tested:
+ *
+ * int a, b, c;
+ * c = ((long long)a * b) >> 16;
+ *
+ * DJGPP GCC Version 3.0.3 generated the following assembly language code for
+ * the multiplication and scaling, leaving the 32-bit result in EAX.
+ *
+ * movl  -8(%ebp), %eax    ; read one int into EAX
+ * imull -4(%ebp)          ; multiply by the other; result goes in EDX:EAX
+ * shrdl $16, %edx, %eax   ; shift EAX right 16, shifting bits in from EDX
+ *
+ * Note that a 32*32->64 multiplication is performed, allowing for high
+ * accuracy. On the Pentium 2 and above, shrdl takes two cycles (generally),
+ * so it is a minor concern when four multiplications are being performed
+ * (the cubic resampler). On the Pentium MMX and earlier, it takes four or
+ * more cycles, so this method is unsuitable for use in the low-quality
+ * resamplers.
+ *
+ * Since "long long" is a gcc-specific extension, we use LONG_LONG instead,
+ * defined in dumb.h. We may investigate later what code MSVC generates, but
+ * if it seems too slow then we suggest you use a good compiler.
+ *
+ * FIXME: these comments are somewhat out of date now.
+ */
+
+
+
+int32 dumb_resample(DUMB_RESAMPLER *resampler, sample_t *dst, int32 dst_size, VOLUME_PARAMETERS, double delta)
+{
+	int dt, inv_dt;
+	int VOLUME_VARIABLES;
+	long done;
+	long todo;
+	double tododbl;
+	int quality;
+
+	if (!resampler || resampler->dir == 0) return 0;
+	ASSERT(resampler->dir == -1 || resampler->dir == 1);
+
+	done = 0;
+	dt = xs_CRoundToInt(delta * 65536.0);
+	if (dt == 0 || dt == 0x80000000) return 0;
+	inv_dt = xs_CRoundToInt(1.0 / delta * 65536.0);
+	SET_VOLUME_VARIABLES;
+
+	if (VOLUMES_ARE_ZERO) dst = NULL;
+
+	_dumb_init_cubic();
+
+	quality = resampler->quality;
+
+	while (done < dst_size) {
+		if (process_pickup(resampler)) {
+			RETURN_VOLUME_VARIABLES;
+			return done;
+		}
+
+		if ((resampler->dir ^ dt) < 0)
+			dt = -dt;
+
+		if (resampler->dir < 0)
+			tododbl = ((resampler->pos - resampler->start) * 65536.f + (resampler->subpos - dt)) / -dt;
+		else
+			tododbl = ((resampler->end - resampler->pos) * 65536.f - (resampler->subpos + 1 - dt)) / dt;
+
+		if (tododbl <= 0)
+			todo = 0;
+		else if (tododbl >= dst_size - done)
+			todo = dst_size - done;
+		else
+			todo = xs_FloorToInt(tododbl);
+
+		done += todo;
+
+		{
+			SRCTYPE *src = resampler->src;
+			long pos = resampler->pos;
+			int subpos = resampler->subpos;
+			long diff = pos;
+			long overshot;
+			if (resampler->dir < 0) {
+				if (!dst) {
+					/* Silence or simulation */
+					LONG_LONG new_subpos = subpos + (LONG_LONG)dt * todo;
+					pos += (long)(new_subpos >> 16);
+					subpos = (long)new_subpos & 65535;
+				} else if (quality <= DUMB_RQ_ALIASING) {
+					/* Aliasing, backwards */
+					SRCTYPE xbuf[2*SRC_CHANNELS];
+					SRCTYPE *x = &xbuf[0];
+					SRCTYPE *xstart;
+					COPYSRC(xbuf, 0, resampler->X, 1);
+					COPYSRC(xbuf, 1, resampler->X, 2);
+					while (todo && x < &xbuf[2*SRC_CHANNELS]) {
+						// TODO: check what happens when multiple tempo slides occur per row
+						HEAVYASSERT(pos >= resampler->start);
+						MIX_ALIAS(+=, 1, 0);
+						subpos += dt;
+						pos += subpos >> 16;
+						x -= (subpos >> 16) * SRC_CHANNELS;
+						subpos &= 65535;
+						todo--;
+					}
+					x = xstart = &src[pos*SRC_CHANNELS];
+					LOOP4(todo,
+						MIX_ALIAS(+=, 1, 2);
+						subpos += dt;
+						x += (subpos >> 16) * SRC_CHANNELS;
+						subpos &= 65535;
+					);
+					pos += DIVIDE_BY_SRC_CHANNELS(x - xstart);
+				} else if (quality <= DUMB_LQ_LINEAR) {
+					/* Linear interpolation, backwards */
+					SRCTYPE xbuf[3*SRC_CHANNELS];
+					SRCTYPE *x = &xbuf[1*SRC_CHANNELS];
+					COPYSRC(xbuf, 0, resampler->X, 1);
+					COPYSRC(xbuf, 1, resampler->X, 2);
+					COPYSRC(xbuf, 2, src, pos);
+					while (todo && x < &xbuf[3*SRC_CHANNELS]) {
+						HEAVYASSERT(pos >= resampler->start);
+						MIX_LINEAR(+=, 1, 0, -1);
+						subpos += dt;
+						pos += subpos >> 16;
+						x -= (subpos >> 16) * SRC_CHANNELS;
+						subpos &= 65535;
+						todo--;
+					}
+					// TODO: use xstart for others too
+					x = &src[pos*SRC_CHANNELS];
+					LOOP4(todo,
+						HEAVYASSERT(pos >= resampler->start);
+						MIX_LINEAR(+=, 1, 1, 2);
+						subpos += dt;
+						pos += subpos >> 16;
+						x += (subpos >> 16) * SRC_CHANNELS;
+						subpos &= 65535;
+					);
+				} else if (quality <= DUMB_LQ_CUBIC) {
+					/* Cubic interpolation, backwards */
+					SRCTYPE xbuf[6*SRC_CHANNELS];
+					SRCTYPE *x = &xbuf[3*SRC_CHANNELS];
+					COPYSRC(xbuf, 0, resampler->X, 0);
+					COPYSRC(xbuf, 1, resampler->X, 1);
+					COPYSRC(xbuf, 2, resampler->X, 2);
+					COPYSRC(xbuf, 3, src, pos);
+					if (pos-1 >= resampler->start) COPYSRC(xbuf, 4, src, pos-1);
+					if (pos-2 >= resampler->start) COPYSRC(xbuf, 5, src, pos-2);
+					while (todo && x < &xbuf[6*SRC_CHANNELS]) {
+						HEAVYASSERT(pos >= resampler->start);
+						MIX_CUBIC(+=, 1, x, x, 0, -1, -2, -3);
+						subpos += dt;
+						pos += subpos >> 16;
+						x -= (subpos >> 16) * SRC_CHANNELS;
+						subpos &= 65535;
+						todo--;
+					}
+					x = &src[pos*SRC_CHANNELS];
+					LOOP4(todo,
+						HEAVYASSERT(pos >= resampler->start);
+						MIX_CUBIC(+=, 1, x, x, 0, 1, 2, 3);
+						subpos += dt;
+						pos += subpos >> 16;
+						x += (subpos >> 16) * SRC_CHANNELS;
+						subpos &= 65535;
+					);
+				} else {
+					/* FIR resampling, backwards */
+					SRCTYPE *x;
+					if ( resampler->fir_resampler_ratio != delta ) {
+						resampler_set_rate( resampler->fir_resampler[0], delta );
+						resampler_set_rate( resampler->fir_resampler[1], delta );
+						resampler->fir_resampler_ratio = delta;
+					}
+					x = &src[pos*SRC_CHANNELS];
+					while ( todo ) {
+							while ( ( resampler_get_free_count( resampler->fir_resampler[0] ) ||
+							(!resampler_get_sample_count( resampler->fir_resampler[0] )
+	#if SRC_CHANNELS == 2
+							&& !resampler_get_sample_count( resampler->fir_resampler[1] )
+	#endif
+							) ) && pos >= resampler->start )
+							{
+									POKE_FIR(0);
+									pos--;
+									x -= SRC_CHANNELS;
+							}
+							if ( !resampler_get_sample_count( resampler->fir_resampler[0] ) ) break;
+							MIX_FIR;
+							ADVANCE_FIR;
+							--todo;
+					}
+					done -= todo;
+				}
+				diff = diff - pos;
+				overshot = resampler->start - pos - 1;
+				if (diff >= 3) {
+					COPYSRC2(resampler->X, 0, overshot < 3, src, pos+3);
+					COPYSRC2(resampler->X, 1, overshot < 2, src, pos+2);
+					COPYSRC2(resampler->X, 2, overshot < 1, src, pos+1);
+				} else if (diff >= 2) {
+					COPYSRC(resampler->X, 0, resampler->X, 2);
+					COPYSRC2(resampler->X, 1, overshot < 2, src, pos+2);
+					COPYSRC2(resampler->X, 2, overshot < 1, src, pos+1);
+				} else if (diff >= 1) {
+					COPYSRC(resampler->X, 0, resampler->X, 1);
+					COPYSRC(resampler->X, 1, resampler->X, 2);
+					COPYSRC2(resampler->X, 2, overshot < 1, src, pos+1);
+				}
+			} else {
+				if (!dst) {
+					/* Silence or simulation */
+					LONG_LONG new_subpos = subpos + (LONG_LONG)dt * todo;
+					pos += (long)(new_subpos >> 16);
+					subpos = (long)new_subpos & 65535;
+				} else if (quality <= DUMB_RQ_ALIASING) {
+					/* Aliasing, forwards */
+					SRCTYPE xbuf[2*SRC_CHANNELS];
+					SRCTYPE *x = &xbuf[0];
+					SRCTYPE *xstart;
+					COPYSRC(xbuf, 0, resampler->X, 1);
+					COPYSRC(xbuf, 1, resampler->X, 2);
+					while (todo && x < &xbuf[2*SRC_CHANNELS]) {
+						HEAVYASSERT(pos < resampler->end);
+						MIX_ALIAS(+=, 1, 0);
+						subpos += dt;
+						pos += subpos >> 16;
+						x += (subpos >> 16) * SRC_CHANNELS;
+						subpos &= 65535;
+						todo--;
+					}
+					x = xstart = &src[pos*SRC_CHANNELS];
+					LOOP4(todo,
+						MIX_ALIAS(+=, 1, -2);
+						subpos += dt;
+						x += (subpos >> 16) * SRC_CHANNELS;
+						subpos &= 65535;
+					);
+					pos += DIVIDE_BY_SRC_CHANNELS(x - xstart);
+				} else if (quality <= DUMB_LQ_LINEAR) {
+					/* Linear interpolation, forwards */
+					SRCTYPE xbuf[3*SRC_CHANNELS];
+					SRCTYPE *x = &xbuf[1*SRC_CHANNELS];
+					COPYSRC(xbuf, 0, resampler->X, 1);
+					COPYSRC(xbuf, 1, resampler->X, 2);
+					COPYSRC(xbuf, 2, src, pos);
+					while (todo && x < &xbuf[3*SRC_CHANNELS]) {
+						HEAVYASSERT(pos < resampler->end);
+						MIX_LINEAR(+=, 1, -1, 0);
+						subpos += dt;
+						pos += subpos >> 16;
+						x += (subpos >> 16) * SRC_CHANNELS;
+						subpos &= 65535;
+						todo--;
+					}
+					x = &src[pos*SRC_CHANNELS];
+					LOOP4(todo,
+						HEAVYASSERT(pos < resampler->end);
+						MIX_LINEAR(+=, 1, -2, -1);
+						subpos += dt;
+						pos += subpos >> 16;
+						x += (subpos >> 16) * SRC_CHANNELS;
+						subpos &= 65535;
+					);
+				} else if (quality <= DUMB_LQ_CUBIC) {
+					/* Cubic interpolation, forwards */
+					SRCTYPE xbuf[6*SRC_CHANNELS];
+					SRCTYPE *x = &xbuf[3*SRC_CHANNELS];
+					COPYSRC(xbuf, 0, resampler->X, 0);
+					COPYSRC(xbuf, 1, resampler->X, 1);
+					COPYSRC(xbuf, 2, resampler->X, 2);
+					COPYSRC(xbuf, 3, src, pos);
+					if (pos+1 < resampler->end) COPYSRC(xbuf, 4, src, pos+1);
+					if (pos+2 < resampler->end) COPYSRC(xbuf, 5, src, pos+2);
+					while (todo && x < &xbuf[6*SRC_CHANNELS]) {
+						HEAVYASSERT(pos < resampler->end);
+						MIX_CUBIC(+=, 1, x, x, -3, -2, -1, 0);
+						subpos += dt;
+						pos += subpos >> 16;
+						x += (subpos >> 16) * SRC_CHANNELS;
+						subpos &= 65535;
+						todo--;
+					}
+					x = &src[pos*SRC_CHANNELS];
+					LOOP4(todo,
+						HEAVYASSERT(pos < resampler->end);
+						MIX_CUBIC(+=, 1, x, x, -3, -2, -1, 0);
+						subpos += dt;
+						pos += subpos >> 16;
+						x += (subpos >> 16) * SRC_CHANNELS;
+						subpos &= 65535;
+					);
+				} else {
+					/* FIR resampling, forwards */
+					SRCTYPE *x;
+					if ( resampler->fir_resampler_ratio != delta ) {
+						resampler_set_rate( resampler->fir_resampler[0], delta );
+						resampler_set_rate( resampler->fir_resampler[1], delta );
+						resampler->fir_resampler_ratio = delta;
+					}
+					x = &src[pos*SRC_CHANNELS];
+					while ( todo ) {
+							while ( ( resampler_get_free_count( resampler->fir_resampler[0] ) ||
+							(!resampler_get_sample_count( resampler->fir_resampler[0] )
+	#if SRC_CHANNELS == 2
+							&& !resampler_get_sample_count( resampler->fir_resampler[1] )
+	#endif
+							) ) && pos < resampler->end )
+							{
+									POKE_FIR(0);
+									pos++;
+									x += SRC_CHANNELS;
+							}
+							if ( !resampler_get_sample_count( resampler->fir_resampler[0] ) ) break;
+							MIX_FIR;
+							ADVANCE_FIR;
+							--todo;
+					}
+					done -= todo;
+				}
+				diff = pos - diff;
+				overshot = pos - resampler->end;
+				if (diff >= 3) {
+					COPYSRC2(resampler->X, 0, overshot < 3, src, pos-3);
+					COPYSRC2(resampler->X, 1, overshot < 2, src, pos-2);
+					COPYSRC2(resampler->X, 2, overshot < 1, src, pos-1);
+				} else if (diff >= 2) {
+					COPYSRC(resampler->X, 0, resampler->X, 2);
+					COPYSRC2(resampler->X, 1, overshot < 2, src, pos-2);
+					COPYSRC2(resampler->X, 2, overshot < 1, src, pos-1);
+				} else if (diff >= 1) {
+					COPYSRC(resampler->X, 0, resampler->X, 1);
+					COPYSRC(resampler->X, 1, resampler->X, 2);
+					COPYSRC2(resampler->X, 2, overshot < 1, src, pos-1);
+				}
+			}
+			resampler->pos = pos;
+			resampler->subpos = subpos;
+		}
+	}
+
+	RETURN_VOLUME_VARIABLES;
+	return done;
+}
+
+
+
+void dumb_resample_get_current_sample(DUMB_RESAMPLER *resampler, VOLUME_PARAMETERS, sample_t *dst)
+{
+	int VOLUME_VARIABLES;
+	SRCTYPE *src;
+	long pos;
+	int subpos;
+	int quality;
+	SRCTYPE *x;
+
+	if (!resampler || resampler->dir == 0) { MIX_ZEROS(=); return; }
+	ASSERT(resampler->dir == -1 || resampler->dir == 1);
+
+	if (process_pickup(resampler)) { MIX_ZEROS(=); return; }
+
+	SET_VOLUME_VARIABLES;
+
+	if (VOLUMES_ARE_ZERO) { MIX_ZEROS(=); return; }
+
+	_dumb_init_cubic();
+
+	quality = resampler->quality;
+
+	src = resampler->src;
+	pos = resampler->pos;
+	subpos = resampler->subpos;
+	x = resampler->X;
+
+	if (resampler->dir < 0) {
+		HEAVYASSERT(pos >= resampler->start);
+		if (quality <= DUMB_RQ_ALIASING) {
+			/* Aliasing, backwards */
+			MIX_ALIAS(=, 0, 1);
+		} else if (quality <= DUMB_LQ_LINEAR) {
+			/* Linear interpolation, backwards */
+			MIX_LINEAR(=, 0, 2, 1);
+		} else if (quality <= DUMB_LQ_CUBIC) {
+			/* Cubic interpolation, backwards */
+			MIX_CUBIC(=, 0, src, x, pos, 2, 1, 0);
+		} else {
+			/* FIR resampling, backwards */
+			PEEK_FIR;
+		}
+	} else {
+		HEAVYASSERT(pos < resampler->end);
+		if (quality <= DUMB_RQ_ALIASING) {
+			/* Aliasing */
+			MIX_ALIAS(=, 0, 1);
+		} else if (quality <= DUMB_LQ_LINEAR) {
+			/* Linear interpolation, forwards */
+			MIX_LINEAR(=, 0, 1, 2);
+		} else if (quality <= DUMB_LQ_CUBIC) {
+			/* Cubic interpolation, forwards */
+			MIX_CUBIC(=, 0, x, src, 0, 1, 2, pos);
+		} else {
+			/* FIR resampling, forwards */
+			PEEK_FIR;
+		}
+	}
+}
+
+
+
+#undef MIX_ZEROS
+#undef MIX_FIR
+#undef PEEK_FIR
+#undef VOLUMES_ARE_ZERO
+#undef SET_VOLUME_VARIABLES
+#undef RETURN_VOLUME_VARIABLES
+#undef VOLUME_VARIABLES
+#undef VOLUME_PARAMETERS
+#undef SUFFIX3
diff --git a/libraries/dumb/src/helpers/resample.c b/libraries/dumb/src/helpers/resample.c
new file mode 100644
index 000000000..30a60d8da
--- /dev/null
+++ b/libraries/dumb/src/helpers/resample.c
@@ -0,0 +1,420 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * resample.c - Resampling helpers.                   / / \  \
+ *                                                   | <  /   \_
+ * By Bob and entheh.                                |  \/ /\   /
+ *                                                    \_  /  > /
+ * In order to find a good trade-off between            | \ / /
+ * speed and accuracy in this code, some tests          |  ' /
+ * were carried out regarding the behaviour of           \__/
+ * long long ints with gcc. The following code
+ * was tested:
+ *
+ * int a, b, c;
+ * c = ((long long)a * b) >> 16;
+ *
+ * DJGPP GCC Version 3.0.3 generated the following assembly language code for
+ * the multiplication and scaling, leaving the 32-bit result in EAX.
+ *
+ * movl  -8(%ebp), %eax    ; read one int into EAX
+ * imull -4(%ebp)          ; multiply by the other; result goes in EDX:EAX
+ * shrdl $16, %edx, %eax   ; shift EAX right 16, shifting bits in from EDX
+ *
+ * Note that a 32*32->64 multiplication is performed, allowing for high
+ * accuracy. On the Pentium 2 and above, shrdl takes two cycles (generally),
+ * so it is a minor concern when four multiplications are being performed
+ * (the cubic resampler). On the Pentium MMX and earlier, it takes four or
+ * more cycles, so this method is unsuitable for use in the low-quality
+ * resamplers.
+ *
+ * Since "long long" is a gcc-specific extension, we use LONG_LONG instead,
+ * defined in dumb.h. We may investigate later what code MSVC generates, but
+ * if it seems too slow then we suggest you use a good compiler.
+ *
+ * FIXME: these comments are somewhat out of date now.
+ */
+
+#include <math.h>
+#include "dumb.h"
+
+#include "internal/resampler.h"
+#include "internal/mulsc.h"
+
+
+
+/* Compile with -DHEAVYDEBUG if you want to make sure the pick-up function is
+ * called when it should be. There will be a considerable performance hit,
+ * since at least one condition has to be tested for every sample generated.
+ */
+#ifdef HEAVYDEBUG
+#define HEAVYASSERT(cond) ASSERT(cond)
+#else
+#define HEAVYASSERT(cond)
+#endif
+
+
+
+/* Make MSVC shut the hell up about if ( upd ) UPDATE_VOLUME() conditions being constant */
+#ifdef _MSC_VER
+#pragma warning(disable:4127 4701)
+#endif
+
+
+
+/* A global variable for controlling resampling quality wherever a local
+ * specification doesn't override it. The following values are valid:
+ *
+ *  0 - DUMB_RQ_ALIASING - fastest
+ *  1 - DUMB_RQ_BLEP     - nicer than aliasing, but slower
+ *  2 - DUMB_RQ_LINEAR
+ *  3 - DUMB_RQ_BLAM     - band-limited linear interpolation, nice but slower
+ *  4 - DUMB_RQ_CUBIC
+ *  5 - DUMB_RQ_FIR      - nicest
+ *
+ * Values outside the range 0-4 will behave the same as the nearest
+ * value within the range.
+ */
+int dumb_resampling_quality = DUMB_RQ_CUBIC;
+
+
+
+/* From xs_Float.h ==============================================*/
+#if __BIG_ENDIAN__
+	#define _xs_iman_				1
+#else
+	#define _xs_iman_				0
+#endif //BigEndian_
+
+#ifdef __GNUC__
+#define finline inline
+#else
+#define finline __forceinline
+#endif
+
+union _xs_doubleints
+{
+	double val;
+	unsigned int ival[2];
+};
+
+static const double _xs_doublemagic			= (6755399441055744.0); 	//2^52 * 1.5,  uses limited precisicion to floor
+static const double _xs_doublemagicroundeps	= (.5f-(1.5e-8));			//almost .5f = .5f - 1e^(number of exp bit)
+
+static finline int xs_CRoundToInt(double val)
+{
+	union _xs_doubleints uval;
+	val += _xs_doublemagic;
+	uval.val = val;
+	return uval.ival[_xs_iman_];
+}
+static finline int xs_FloorToInt(double val)
+{
+	union _xs_doubleints uval;
+	val -= _xs_doublemagicroundeps;
+	val += _xs_doublemagic;
+	uval.val = val;
+	return uval.ival[_xs_iman_];
+}
+/* Not from xs_Float.h ==========================================*/
+
+
+/* Executes the content 'iterator' times.
+ * Clobbers the 'iterator' variable.
+ * The loop is unrolled by four.
+ */
+#if 0
+#define LOOP4(iterator, CONTENT) \
+{ \
+	if ((iterator) & 2) { \
+		CONTENT; \
+		CONTENT; \
+	} \
+	if ((iterator) & 1) { \
+		CONTENT; \
+	} \
+	(iterator) >>= 2; \
+	while (iterator) { \
+		CONTENT; \
+		CONTENT; \
+		CONTENT; \
+		CONTENT; \
+		(iterator)--; \
+	} \
+}
+#else
+#define LOOP4(iterator, CONTENT) \
+{ \
+	while ( (iterator)-- ) \
+	{ \
+		CONTENT; \
+	} \
+}
+#endif
+
+#define PASTERAW(a, b) a ## b /* This does not expand macros in b ... */
+#define PASTE(a, b) PASTERAW(a, b) /* ... but b is expanded during this substitution. */
+
+#define X PASTE(x.x, SRCBITS)
+
+
+
+/* Cubic resampler: look-up tables
+ *
+ * a = 1.5*x1 - 1.5*x2 + 0.5*x3 - 0.5*x0
+ * b = 2*x2 + x0 - 2.5*x1 - 0.5*x3
+ * c = 0.5*x2 - 0.5*x0
+ * d = x1
+ *
+ * x = a*t*t*t + b*t*t + c*t + d
+ *   = (-0.5*x0 + 1.5*x1 - 1.5*x2 + 0.5*x3) * t*t*t +
+ *     (   1*x0 - 2.5*x1 + 2  *x2 - 0.5*x3) * t*t +
+ *     (-0.5*x0          + 0.5*x2         ) * t +
+ *     (            1*x1                  )
+ *   = (-0.5*t*t*t + 1  *t*t - 0.5*t    ) * x0 +
+ *     ( 1.5*t*t*t - 2.5*t*t         + 1) * x1 +
+ *     (-1.5*t*t*t + 2  *t*t + 0.5*t    ) * x2 +
+ *     ( 0.5*t*t*t - 0.5*t*t            ) * x3
+ *   = A0(t) * x0 + A1(t) * x1 + A2(t) * x2 + A3(t) * x3
+ *
+ * A0, A1, A2 and A3 stay within the range [-1,1].
+ * In the tables, they are scaled with 14 fractional bits.
+ *
+ * Turns out we don't need to store A2 and A3; they are symmetrical to A1 and A0.
+ *
+ * TODO: A0 and A3 stay very small indeed. Consider different scale/resolution?
+ */
+
+static short cubicA0[1025], cubicA1[1025];
+
+void _dumb_init_cubic(void)
+{
+	unsigned int t; /* 3*1024*1024*1024 is within range if it's unsigned */
+	static int done = 0;
+	if (done) return;
+	for (t = 0; t < 1025; t++) {
+		/* int casts to pacify warnings about negating unsigned values */
+		cubicA0[t] = -(int)(  t*t*t >> 17) + (int)(  t*t >> 6) - (int)(t << 3);
+		cubicA1[t] =  (int)(3*t*t*t >> 17) - (int)(5*t*t >> 7) + (int)(1 << 14);
+	}
+	resampler_init();
+
+	done = 1;
+}
+
+
+
+/* Create resamplers for 24-in-32-bit source samples. */
+
+/* #define SUFFIX
+ * MSVC warns if we try to paste a null SUFFIX, so instead we define
+ * special macros for the function names that don't bother doing the
+ * corresponding paste. The more generic definitions are further down.
+ */
+#define process_pickup PASTE(process_pickup, SUFFIX2)
+#define dumb_resample PASTE(PASTE(dumb_resample, SUFFIX2), SUFFIX3)
+#define dumb_resample_get_current_sample PASTE(PASTE(dumb_resample_get_current_sample, SUFFIX2), SUFFIX3)
+
+#define SRCTYPE sample_t
+#define SRCBITS 24
+#define ALIAS(x, vol) MULSC(x, vol)
+#define LINEAR(x0, x1) (x0 + MULSC(x1 - x0, subpos))
+#define CUBIC(x0, x1, x2, x3) ( \
+	MULSC(x0, cubicA0[subpos >> 6] << 2) + \
+	MULSC(x1, cubicA1[subpos >> 6] << 2) + \
+	MULSC(x2, cubicA1[1 + (subpos >> 6 ^ 1023)] << 2) + \
+	MULSC(x3, cubicA0[1 + (subpos >> 6 ^ 1023)] << 2))
+#define CUBICVOL(x, vol) MULSC(x, vol)
+#define FIR(x) (x >> 8)
+#include "resample.inc"
+
+/* Undefine the simplified macros. */
+#undef dumb_resample_get_current_sample
+#undef dumb_resample
+#undef process_pickup
+
+
+/* Now define the proper ones that use SUFFIX. */
+#define dumb_reset_resampler PASTE(dumb_reset_resampler, SUFFIX)
+#define dumb_start_resampler PASTE(dumb_start_resampler, SUFFIX)
+#define process_pickup PASTE(PASTE(process_pickup, SUFFIX), SUFFIX2)
+#define dumb_resample PASTE(PASTE(PASTE(dumb_resample, SUFFIX), SUFFIX2), SUFFIX3)
+#define dumb_resample_get_current_sample PASTE(PASTE(PASTE(dumb_resample_get_current_sample, SUFFIX), SUFFIX2), SUFFIX3)
+#define dumb_end_resampler PASTE(dumb_end_resampler, SUFFIX)
+
+/* Create resamplers for 16-bit source samples. */
+#define SUFFIX _16
+#define SRCTYPE short
+#define SRCBITS 16
+#define ALIAS(x, vol) (x * vol >> 8)
+#define LINEAR(x0, x1) ((x0 << 8) + MULSC16(x1 - x0, subpos))
+#define CUBIC(x0, x1, x2, x3) ( \
+	x0 * cubicA0[subpos >> 6] + \
+	x1 * cubicA1[subpos >> 6] + \
+	x2 * cubicA1[1 + (subpos >> 6 ^ 1023)] + \
+	x3 * cubicA0[1 + (subpos >> 6 ^ 1023)])
+#define CUBICVOL(x, vol) MULSCV((x), ((vol) << 10))
+#define FIR(x) (x)
+#include "resample.inc"
+
+/* Create resamplers for 8-bit source samples. */
+#define SUFFIX _8
+#define SRCTYPE signed char
+#define SRCBITS 8
+#define ALIAS(x, vol) (x * vol)
+#define LINEAR(x0, x1) ((x0 << 16) + (x1 - x0) * subpos)
+#define CUBIC(x0, x1, x2, x3) (( \
+	x0 * cubicA0[subpos >> 6] + \
+	x1 * cubicA1[subpos >> 6] + \
+	x2 * cubicA1[1 + (subpos >> 6 ^ 1023)] + \
+	x3 * cubicA0[1 + (subpos >> 6 ^ 1023)]) << 6)
+#define CUBICVOL(x, vol) MULSCV((x), ((vol) << 12))
+#define FIR(x) (x << 8)
+#include "resample.inc"
+
+
+#undef dumb_reset_resampler
+#undef dumb_start_resampler
+#undef process_pickup
+#undef dumb_resample
+#undef dumb_resample_get_current_sample
+#undef dumb_end_resampler
+
+
+
+void dumb_reset_resampler_n(int n, DUMB_RESAMPLER *resampler, void *src, int src_channels, int32 pos, int32 start, int32 end, int quality)
+{
+	if (n == 8)
+		dumb_reset_resampler_8(resampler, src, src_channels, pos, start, end, quality);
+	else if (n == 16)
+		dumb_reset_resampler_16(resampler, src, src_channels, pos, start, end, quality);
+	else
+		dumb_reset_resampler(resampler, src, src_channels, pos, start, end, quality);
+}
+
+
+
+DUMB_RESAMPLER *dumb_start_resampler_n(int n, void *src, int src_channels, int32 pos, int32 start, int32 end, int quality)
+{
+	if (n == 8)
+		return dumb_start_resampler_8(src, src_channels, pos, start, end, quality);
+	else if (n == 16)
+		return dumb_start_resampler_16(src, src_channels, pos, start, end, quality);
+	else
+		return dumb_start_resampler(src, src_channels, pos, start, end, quality);
+}
+
+
+#if 0
+int32 dumb_resample_n_1_1(int n, DUMB_RESAMPLER *resampler, sample_t *dst, int32 dst_size, DUMB_VOLUME_RAMP_INFO * volume, double delta)
+{
+	if (n == 8)
+		return dumb_resample_8_1_1(resampler, dst, dst_size, volume, delta);
+	else if (n == 16)
+		return dumb_resample_16_1_1(resampler, dst, dst_size, volume, delta);
+	else
+		return dumb_resample_1_1(resampler, dst, dst_size, volume, delta);
+}
+#endif
+
+
+int32 dumb_resample_n_1_2(int n, DUMB_RESAMPLER *resampler, sample_t *dst, int32 dst_size, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, double delta)
+{
+	if (n == 8)
+		return dumb_resample_8_1_2(resampler, dst, dst_size, volume_left, volume_right, delta);
+	else if (n == 16)
+		return dumb_resample_16_1_2(resampler, dst, dst_size, volume_left, volume_right, delta);
+	else
+		return dumb_resample_1_2(resampler, dst, dst_size, volume_left, volume_right, delta);
+}
+
+
+#if 0
+int32 dumb_resample_n_2_1(int n, DUMB_RESAMPLER *resampler, sample_t *dst, int32 dst_size, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, double delta)
+{
+	if (n == 8)
+		return dumb_resample_8_2_1(resampler, dst, dst_size, volume_left, volume_right, delta);
+	else if (n == 16)
+		return dumb_resample_16_2_1(resampler, dst, dst_size, volume_left, volume_right, delta);
+	else
+		return dumb_resample_2_1(resampler, dst, dst_size, volume_left, volume_right, delta);
+}
+#endif
+
+
+int32 dumb_resample_n_2_2(int n, DUMB_RESAMPLER *resampler, sample_t *dst, int32 dst_size, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, double delta)
+{
+	if (n == 8)
+		return dumb_resample_8_2_2(resampler, dst, dst_size, volume_left, volume_right, delta);
+	else if (n == 16)
+		return dumb_resample_16_2_2(resampler, dst, dst_size, volume_left, volume_right, delta);
+	else
+		return dumb_resample_2_2(resampler, dst, dst_size, volume_left, volume_right, delta);
+}
+
+
+#if 0
+void dumb_resample_get_current_sample_n_1_1(int n, DUMB_RESAMPLER *resampler, DUMB_VOLUME_RAMP_INFO * volume, sample_t *dst)
+{
+	if (n == 8)
+		dumb_resample_get_current_sample_8_1_1(resampler, volume, dst);
+	else if (n == 16)
+		dumb_resample_get_current_sample_16_1_1(resampler, volume, dst);
+	else
+		dumb_resample_get_current_sample_1_1(resampler, volume, dst);
+}
+#endif
+
+
+void dumb_resample_get_current_sample_n_1_2(int n, DUMB_RESAMPLER *resampler, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, sample_t *dst)
+{
+	if (n == 8)
+		dumb_resample_get_current_sample_8_1_2(resampler, volume_left, volume_right, dst);
+	else if (n == 16)
+		dumb_resample_get_current_sample_16_1_2(resampler, volume_left, volume_right, dst);
+	else
+		dumb_resample_get_current_sample_1_2(resampler, volume_left, volume_right, dst);
+}
+
+
+#if 0
+void dumb_resample_get_current_sample_n_2_1(int n, DUMB_RESAMPLER *resampler, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, sample_t *dst)
+{
+	if (n == 8)
+		dumb_resample_get_current_sample_8_2_1(resampler, volume_left, volume_right, dst);
+	else if (n == 16)
+		dumb_resample_get_current_sample_16_2_1(resampler, volume_left, volume_right, dst);
+	else
+		dumb_resample_get_current_sample_2_1(resampler, volume_left, volume_right, dst);
+}
+#endif
+
+
+void dumb_resample_get_current_sample_n_2_2(int n, DUMB_RESAMPLER *resampler, DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right, sample_t *dst)
+{
+	if (n == 8)
+		dumb_resample_get_current_sample_8_2_2(resampler, volume_left, volume_right, dst);
+	else if (n == 16)
+		dumb_resample_get_current_sample_16_2_2(resampler, volume_left, volume_right, dst);
+	else
+		dumb_resample_get_current_sample_2_2(resampler, volume_left, volume_right, dst);
+}
+
+
+
+void dumb_end_resampler_n(int n, DUMB_RESAMPLER *resampler)
+{
+	if (n == 8)
+		dumb_end_resampler_8(resampler);
+	else if (n == 16)
+		dumb_end_resampler_16(resampler);
+	else
+		dumb_end_resampler(resampler);
+}
diff --git a/libraries/dumb/src/helpers/resample.inc b/libraries/dumb/src/helpers/resample.inc
new file mode 100644
index 000000000..e5b8345d5
--- /dev/null
+++ b/libraries/dumb/src/helpers/resample.inc
@@ -0,0 +1,299 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * resample.inc - Resampling helper template.         / / \  \
+ *                                                   | <  /   \_
+ * By Bob and entheh.                                |  \/ /\   /
+ *                                                    \_  /  > /
+ * In order to find a good trade-off between            | \ / /
+ * speed and accuracy in this code, some tests          |  ' /
+ * were carried out regarding the behaviour of           \__/
+ * long long ints with gcc. The following code
+ * was tested:
+ *
+ * int a, b, c;
+ * c = ((long long)a * b) >> 16;
+ *
+ * DJGPP GCC Version 3.0.3 generated the following assembly language code for
+ * the multiplication and scaling, leaving the 32-bit result in EAX.
+ *
+ * movl  -8(%ebp), %eax    ; read one int into EAX
+ * imull -4(%ebp)          ; multiply by the other; result goes in EDX:EAX
+ * shrdl $16, %edx, %eax   ; shift EAX right 16, shifting bits in from EDX
+ *
+ * Note that a 32*32->64 multiplication is performed, allowing for high
+ * accuracy. On the Pentium 2 and above, shrdl takes two cycles (generally),
+ * so it is a minor concern when four multiplications are being performed
+ * (the cubic resampler). On the Pentium MMX and earlier, it takes four or
+ * more cycles, so this method is unsuitable for use in the low-quality
+ * resamplers.
+ *
+ * Since "long long" is a gcc-specific extension, we use LONG_LONG instead,
+ * defined in dumb.h. We may investigate later what code MSVC generates, but
+ * if it seems too slow then we suggest you use a good compiler.
+ *
+ * FIXME: these comments are somewhat out of date now.
+ */
+
+
+
+void dumb_reset_resampler(DUMB_RESAMPLER *resampler, SRCTYPE *src, int src_channels, int32 pos, int32 start, int32 end, int quality)
+{
+	int i;
+	resampler->src = src;
+	resampler->pos = pos;
+	resampler->subpos = 0;
+	resampler->start = start;
+	resampler->end = end;
+	resampler->dir = 1;
+	resampler->pickup = NULL;
+	resampler->pickup_data = NULL;
+	if (quality < 0)
+	{
+		resampler->quality = 0;
+	}
+	else if (quality > DUMB_RQ_N_LEVELS - 1)
+	{
+		resampler->quality = DUMB_RQ_N_LEVELS - 1;
+	}
+	else
+	{
+		resampler->quality = quality;
+	}
+	for (i = 0; i < src_channels*3; i++) resampler->X[i] = 0;
+	resampler->overshot = -1;
+	resampler->fir_resampler_ratio = 0;
+	resampler_clear(resampler->fir_resampler[0]);
+	resampler_clear(resampler->fir_resampler[1]);
+	resampler_set_quality(resampler->fir_resampler[0], resampler->quality - DUMB_RESAMPLER_BASE);
+	resampler_set_quality(resampler->fir_resampler[1], resampler->quality - DUMB_RESAMPLER_BASE);
+}
+
+
+
+DUMB_RESAMPLER *dumb_start_resampler(SRCTYPE *src, int src_channels, int32 pos, int32 start, int32 end, int quality)
+{
+	DUMB_RESAMPLER *resampler = malloc(sizeof(*resampler));
+	if (!resampler) return NULL;
+	dumb_reset_resampler(resampler, src, src_channels, pos, start, end, quality);
+	return resampler;
+}
+
+
+
+#define UPDATE_VOLUME( pvol, vol ) {                               \
+	if (pvol) {                                                    \
+		vol##r += vol##d;                                          \
+		if ((vol##d < 0 && vol##r <= vol##t) ||                    \
+			(vol##d > 0 && vol##r >= vol##t)) {                    \
+			pvol->volume = pvol->target;                           \
+            if ( pvol->declick_stage == 0 ||                       \
+                 pvol->declick_stage >= 3)                         \
+                 pvol->declick_stage++;                            \
+			pvol = NULL;                                           \
+			vol = MULSCV( vol##t, vol##m );                        \
+		} else {                                                   \
+			vol = MULSCV( vol##r, vol##m );                        \
+		}                                                          \
+	}                                                              \
+}
+
+
+
+/* Create mono source resampler. */
+#define SUFFIX2 _1
+#define SRC_CHANNELS 1
+#define DIVIDE_BY_SRC_CHANNELS(x) (int)(x)
+#define COPYSRC(dstarray, dstindex, srcarray, srcindex) (dstarray)[dstindex] = (srcarray)[srcindex]
+#define COPYSRC2(dstarray, dstindex, condition, srcarray, srcindex) (dstarray)[dstindex] = condition ? (srcarray)[srcindex] : 0
+#define MONO_DEST_VOLUME_PARAMETERS DUMB_VOLUME_RAMP_INFO * volume
+#define MONO_DEST_VOLUME_VARIABLES vol, volr, vold, volt, volm
+#define MONO_DEST_VOLUME_ZEROS 0, 0
+#define SET_MONO_DEST_VOLUME_VARIABLES { \
+	if ( volume ) { \
+		volr = xs_FloorToInt(volume->volume * 16777216.f); \
+		vold = xs_FloorToInt(volume->delta * 16777216.f); \
+		volt = xs_FloorToInt(volume->target * 16777216.f); \
+		volm = xs_FloorToInt(volume->mix * 16777216.f); \
+		vol = MULSCV( volr, volm ); \
+		if ( volr == volt ) volume = NULL; \
+	} else { \
+		vol = 0; \
+		vold = 0; \
+		volt = 0; \
+		volm = 0; \
+	} \
+}
+#define RETURN_MONO_DEST_VOLUME_VARIABLES if ( volume ) volume->volume = (float)volr / 16777216.0f
+#define MONO_DEST_VOLUMES_ARE_ZERO (vol == 0 && volt == 0)
+#define STEREO_DEST_MIX_ALIAS(op, upd, offset) { \
+	int xm = x[offset]; \
+	*dst++ op ALIAS(xm, lvol); \
+	*dst++ op ALIAS(xm, rvol); \
+	if ( upd ) UPDATE_VOLUME( volume_left, lvol ); \
+	if ( upd ) UPDATE_VOLUME( volume_right, rvol ); \
+}
+#define STEREO_DEST_MIX_LINEAR(op, upd, o0, o1) { \
+	int xm = LINEAR(x[o0], x[o1]); \
+	*dst++ op MULSC(xm, lvol); \
+	*dst++ op MULSC(xm, rvol); \
+	if ( upd ) UPDATE_VOLUME( volume_left, lvol ); \
+	if ( upd ) UPDATE_VOLUME( volume_right, rvol ); \
+}
+#define STEREO_DEST_MIX_CUBIC(op, upd, x0, x3, o0, o1, o2, o3) { \
+	int xm = CUBIC(x0[o0], x[o1], x[o2], x3[o3]); \
+	*dst++ op CUBICVOL(xm, lvol); \
+	*dst++ op CUBICVOL(xm, rvol); \
+	if ( upd ) UPDATE_VOLUME( volume_left, lvol ); \
+	if ( upd ) UPDATE_VOLUME( volume_right, rvol ); \
+}
+#define POKE_FIR(offset) { \
+        resampler_write_sample( resampler->fir_resampler[0], FIR(x[offset]) ); \
+}
+#define MONO_DEST_PEEK_FIR *dst = MULSC( resampler_get_sample( resampler->fir_resampler[0] ), vol )
+#define MONO_DEST_MIX_FIR { \
+        *dst++ += MULSC( resampler_get_sample( resampler->fir_resampler[0] ), vol ); \
+        UPDATE_VOLUME( volume, vol ); \
+}
+#define ADVANCE_FIR resampler_remove_sample( resampler->fir_resampler[0], 1 )
+#define STEREO_DEST_PEEK_FIR { \
+        int sample = resampler_get_sample( resampler->fir_resampler[0] ); \
+        *dst++ = MULSC( sample, lvol ); \
+        *dst++ = MULSC( sample, rvol ); \
+}
+#define STEREO_DEST_MIX_FIR { \
+        int sample = resampler_get_sample( resampler->fir_resampler[0] ); \
+        *dst++ += MULSC( sample, lvol ); \
+        *dst++ += MULSC( sample, rvol ); \
+        UPDATE_VOLUME( volume_left, lvol ); \
+        UPDATE_VOLUME( volume_right, rvol ); \
+}
+#include "resamp2.inc"
+
+/* Create stereo source resampler. */
+#define SUFFIX2 _2
+#define SRC_CHANNELS 2
+#define DIVIDE_BY_SRC_CHANNELS(x) (int)((x) >> 1)
+#define COPYSRC(dstarray, dstindex, srcarray, srcindex) { \
+	(dstarray)[(dstindex)*2] = (srcarray)[(srcindex)*2]; \
+	(dstarray)[(dstindex)*2+1] = (srcarray)[(srcindex)*2+1]; \
+}
+#define COPYSRC2(dstarray, dstindex, condition, srcarray, srcindex) { \
+	if (condition) { \
+		(dstarray)[(dstindex)*2] = (srcarray)[(srcindex)*2]; \
+		(dstarray)[(dstindex)*2+1] = (srcarray)[(srcindex)*2+1]; \
+	} else { \
+		(dstarray)[(dstindex)*2] = 0; \
+		(dstarray)[(dstindex)*2+1] = 0; \
+	} \
+}
+
+#define MONO_DEST_VOLUME_PARAMETERS DUMB_VOLUME_RAMP_INFO * volume_left, DUMB_VOLUME_RAMP_INFO * volume_right
+#define MONO_DEST_VOLUME_VARIABLES lvol, lvolr, lvold, lvolt, lvolm, rvol, rvolr, rvold, rvolt, rvolm
+#define MONO_DEST_VOLUME_ZEROS 0, 0
+#define SET_MONO_DEST_VOLUME_VARIABLES { \
+	if ( volume_left ) { \
+		lvolr = xs_FloorToInt(volume_left->volume * 16777216.f); \
+		lvold = xs_FloorToInt(volume_left->delta * 16777216.f); \
+		lvolt = xs_FloorToInt(volume_left->target * 16777216.f); \
+		lvolm = xs_FloorToInt(volume_left->mix * 16777216.f); \
+		lvol = MULSCV( lvolr, lvolm ); \
+		if ( lvolr == lvolt ) volume_left = NULL; \
+	} else { \
+		lvol = 0; \
+		lvold = 0; \
+		lvolt = 0; \
+		lvolm = 0; \
+	} \
+	if ( volume_right ) { \
+		rvolr = xs_FloorToInt(volume_right->volume * 16777216.f); \
+		rvold = xs_FloorToInt(volume_right->delta * 16777216.f); \
+		rvolt = xs_FloorToInt(volume_right->target * 16777216.f); \
+		rvolm = xs_FloorToInt(volume_right->mix * 16777216.f); \
+		rvol = MULSCV( rvolr, rvolm ); \
+		if ( rvolr == rvolt ) volume_right = NULL; \
+	} else { \
+		rvol = 0; \
+		rvold = 0; \
+		rvolt = 0; \
+		rvolm = 0; \
+	} \
+}
+#define RETURN_MONO_DEST_VOLUME_VARIABLES { \
+	if ( volume_left ) volume_left->volume = (float)lvolr / 16777216.0f; \
+	if ( volume_right ) volume_right->volume = (float)rvolr / 16777216.0f; \
+}
+#define MONO_DEST_VOLUMES_ARE_ZERO (lvol == 0 && lvolt == 0 && rvol == 0 && rvolt == 0)
+#define STEREO_DEST_MIX_ALIAS(op, upd, offset) { \
+	*dst++ op ALIAS(x[(offset)*2], lvol); \
+	*dst++ op ALIAS(x[(offset)*2+1], rvol); \
+	if ( upd ) UPDATE_VOLUME( volume_left, lvol ); \
+	if ( upd ) UPDATE_VOLUME( volume_right, rvol ); \
+}
+#define STEREO_DEST_MIX_LINEAR(op, upd, o0, o1) { \
+	*dst++ op MULSC(LINEAR(x[(o0)*2], x[(o1)*2]), lvol); \
+	*dst++ op MULSC(LINEAR(x[(o0)*2+1], x[(o1)*2+1]), rvol); \
+	if ( upd ) UPDATE_VOLUME( volume_left, lvol ); \
+	if ( upd ) UPDATE_VOLUME( volume_right, rvol ); \
+}
+#define STEREO_DEST_MIX_CUBIC(op, upd, x0, x3, o0, o1, o2, o3) { \
+	*dst++ op CUBICVOL(CUBIC(x0[(o0)*2], x[(o1)*2], x[(o2)*2], x3[(o3)*2]), lvol); \
+	*dst++ op CUBICVOL(CUBIC(x0[(o0)*2+1], x[(o1)*2+1], x[(o2)*2+1], x3[(o3)*2+1]), rvol); \
+	if ( upd ) UPDATE_VOLUME( volume_left, lvol ); \
+	if ( upd ) UPDATE_VOLUME( volume_right, rvol ); \
+}
+#define POKE_FIR(offset) { \
+        resampler_write_sample( resampler->fir_resampler[0], FIR(x[(offset)*2+0]) ); \
+        resampler_write_sample( resampler->fir_resampler[1], FIR(x[(offset)*2+1]) ); \
+}
+#define MONO_DEST_PEEK_FIR { \
+        *dst = MULSC( resampler_get_sample( resampler->fir_resampler[0] ), lvol ) + \
+                MULSC( resampler_get_sample( resampler->fir_resampler[1] ), rvol ); \
+}
+#define MONO_DEST_MIX_FIR { \
+        *dst++ += MULSC( resampler_get_sample( resampler->fir_resampler[0] ), lvol ) + \
+                MULSC( resampler_get_sample( resampler->fir_resampler[1] ), rvol ); \
+        UPDATE_VOLUME( volume_left, lvol ); \
+        UPDATE_VOLUME( volume_right, rvol ); \
+}
+#define ADVANCE_FIR { \
+        resampler_remove_sample( resampler->fir_resampler[0], 1 ); \
+        resampler_remove_sample( resampler->fir_resampler[1], 1 ); \
+}
+#define STEREO_DEST_PEEK_FIR { \
+        *dst++ = MULSC( resampler_get_sample( resampler->fir_resampler[0] ), lvol ); \
+        *dst++ = MULSC( resampler_get_sample( resampler->fir_resampler[1] ), rvol ); \
+}
+#define STEREO_DEST_MIX_FIR { \
+        *dst++ += MULSC( resampler_get_sample( resampler->fir_resampler[0] ), lvol ); \
+        *dst++ += MULSC( resampler_get_sample( resampler->fir_resampler[1] ), rvol ); \
+        UPDATE_VOLUME( volume_left, lvol ); \
+        UPDATE_VOLUME( volume_right, rvol ); \
+}
+#include "resamp2.inc"
+
+
+
+void dumb_end_resampler(DUMB_RESAMPLER *resampler)
+{
+	if (resampler)
+		free(resampler);
+}
+
+
+
+#undef FIR
+#undef CUBICVOL
+#undef CUBIC
+#undef LINEAR
+#undef ALIAS
+#undef SRCBITS
+#undef SRCTYPE
+#undef SUFFIX
diff --git a/libraries/dumb/src/helpers/resampler.c b/libraries/dumb/src/helpers/resampler.c
new file mode 100644
index 000000000..d608c8cf6
--- /dev/null
+++ b/libraries/dumb/src/helpers/resampler.c
@@ -0,0 +1,1512 @@
+#include <stdlib.h>
+#include <string.h>
+#define _USE_MATH_DEFINES
+#include <math.h>
+#if (defined(_M_IX86) || defined(__i386__) || defined(_M_X64) || defined(__amd64__))
+#include <xmmintrin.h>
+#define RESAMPLER_SSE
+#endif
+#ifdef __APPLE__
+#include <TargetConditionals.h>
+#if TARGET_CPU_ARM || TARGET_CPU_ARM64
+#include <arm_neon.h>
+#define RESAMPLER_NEON
+#endif
+#endif
+
+#ifdef _MSC_VER
+#define ALIGNED     _declspec(align(16))
+#else
+#define ALIGNED     __attribute__((aligned(16)))
+#endif
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+
+#include "internal/resampler.h"
+
+enum { RESAMPLER_SHIFT = 10 };
+enum { RESAMPLER_SHIFT_EXTRA = 8 };
+enum { RESAMPLER_RESOLUTION = 1 << RESAMPLER_SHIFT };
+enum { RESAMPLER_RESOLUTION_EXTRA = 1 << (RESAMPLER_SHIFT + RESAMPLER_SHIFT_EXTRA) };
+enum { SINC_WIDTH = 16 };
+enum { SINC_SAMPLES = RESAMPLER_RESOLUTION * SINC_WIDTH };
+enum { CUBIC_SAMPLES = RESAMPLER_RESOLUTION * 4 };
+
+typedef union bigint
+{
+	unsigned long long quad;
+#ifndef __BIG_ENDIAN__
+	struct { unsigned int lo, hi; };
+#else
+	struct { unsigned int hi, lo; };
+#endif
+} bigint;
+
+// What works well on 32-bit can make for extra work on 64-bit
+#if defined(_M_X64) || defined(__amd64__) || TARGET_CPU_ARM64
+#define CLEAR_HI(p)		(p.quad &= 0xffffffffu)
+#define ADD_HI(a,p)		(a += p.quad >> 32)
+#define PHASE_REDUCE(p)	(int)(p.quad >> (32 - RESAMPLER_SHIFT))
+#else
+#define CLEAR_HI(p)		p.hi = 0
+#define ADD_HI(a,p)		a += p.hi
+// Should be equivalent to (int)(p.quad >> (32 - RESAMPLER_SHIFT)),
+// since the high part should get zeroed after every sample.
+#define PHASE_REDUCE(p)	(p.lo >> (32 - RESAMPLER_SHIFT))
+#endif
+
+static const float RESAMPLER_BLEP_CUTOFF = 0.90f;
+static const float RESAMPLER_BLAM_CUTOFF = 0.93f;
+static const float RESAMPLER_SINC_CUTOFF = 0.999f;
+
+ALIGNED static float cubic_lut[CUBIC_SAMPLES];
+
+static float sinc_lut[SINC_SAMPLES + 1];
+static float window_lut[SINC_SAMPLES + 1];
+
+enum { resampler_buffer_size = SINC_WIDTH * 4 };
+
+static int fEqual(const double b, const double a)
+{
+    return fabs(a - b) < 1.0e-6;
+}
+
+static double sinc(double x)
+{
+    return fEqual(x, 0.0) ? 1.0 : sin(x * M_PI) / (x * M_PI);
+}
+
+#ifdef RESAMPLER_SSE
+#ifdef _MSC_VER
+#include <intrin.h>
+#elif defined(__clang__) || defined(__GNUC__)
+static inline void
+__cpuid(int *data, int selector)
+{
+#if defined(__PIC__) && defined(__i386__)
+    asm("xchgl %%ebx, %%esi; cpuid; xchgl %%ebx, %%esi"
+        : "=a" (data[0]),
+        "=S" (data[1]),
+        "=c" (data[2]),
+        "=d" (data[3])
+        : "0" (selector));
+#elif defined(__PIC__) && defined(__amd64__)
+    asm("xchg{q} {%%}rbx, %q1; cpuid; xchg{q} {%%}rbx, %q1"
+        : "=a" (data[0]),
+        "=&r" (data[1]),
+        "=c" (data[2]),
+        "=d" (data[3])
+        : "0" (selector));
+#else
+    asm("cpuid"
+        : "=a" (data[0]),
+        "=b" (data[1]),
+        "=c" (data[2]),
+        "=d" (data[3])
+        : "0" (selector));
+#endif
+}
+#else
+#define __cpuid(a,b) memset((a), 0, sizeof(int) * 4)
+#endif
+
+static int query_cpu_feature_sse() {
+    int buffer[4];
+    __cpuid(buffer,1);
+    if ((buffer[3]&(1<<25)) == 0) return 0;
+    return 1;
+}
+
+static int resampler_has_sse = 0;
+#endif
+
+void resampler_init(void)
+{
+    unsigned i;
+    double dx = (float)(SINC_WIDTH) / SINC_SAMPLES, x = 0.0;
+    for (i = 0; i < SINC_SAMPLES + 1; ++i, x += dx)
+    {
+        double y = x / SINC_WIDTH;
+#if 0
+        // Blackman
+        float window = 0.42659 - 0.49656 * cos(M_PI + M_PI * y) + 0.076849 * cos(2.0 * M_PI * y);
+#elif 1
+        // Nuttal 3 term
+        double window = 0.40897 + 0.5 * cos(M_PI * y) + 0.09103 * cos(2.0 * M_PI * y);
+#elif 0
+        // C.R.Helmrich's 2 term window
+        float window = 0.79445 * cos(0.5 * M_PI * y) + 0.20555 * cos(1.5 * M_PI * y);
+#elif 0
+        // Lanczos
+        float window = sinc(y);
+#endif
+        sinc_lut[i] = (float)(fabs(x) < SINC_WIDTH ? sinc(x) : 0.0);
+        window_lut[i] = (float)window;
+    }
+    dx = 1.0 / RESAMPLER_RESOLUTION;
+    x = 0.0;
+    for (i = 0; i < RESAMPLER_RESOLUTION; ++i, x += dx)
+    {
+        cubic_lut[i*4]   = (float)(-0.5 * x * x * x +       x * x - 0.5 * x);
+        cubic_lut[i*4+1] = (float)( 1.5 * x * x * x - 2.5 * x * x           + 1.0);
+        cubic_lut[i*4+2] = (float)(-1.5 * x * x * x + 2.0 * x * x + 0.5 * x);
+        cubic_lut[i*4+3] = (float)( 0.5 * x * x * x - 0.5 * x * x);
+    }
+#ifdef RESAMPLER_SSE
+    resampler_has_sse = query_cpu_feature_sse();
+#endif
+}
+
+typedef struct resampler
+{
+    int write_pos, write_filled;
+    int read_pos, read_filled;
+    bigint phase;
+    bigint phase_inc;
+    bigint inv_phase;
+    bigint inv_phase_inc;
+    unsigned char quality;
+    signed char delay_added;
+    signed char delay_removed;
+    double last_amp;
+    double accumulator;
+    float buffer_in[resampler_buffer_size * 2];
+    float buffer_out[resampler_buffer_size + SINC_WIDTH * 2 - 1];
+} resampler;
+
+void * resampler_create(void)
+{
+    resampler * r = ( resampler * ) malloc( sizeof(resampler) );
+    if ( !r ) return 0;
+
+    r->write_pos = SINC_WIDTH - 1;
+    r->write_filled = 0;
+    r->read_pos = 0;
+    r->read_filled = 0;
+    r->phase.quad = 0;
+    r->phase_inc.quad = 0;
+    r->inv_phase.quad = 0;
+    r->inv_phase_inc.quad = 0;
+    r->quality = RESAMPLER_QUALITY_MAX;
+    r->delay_added = -1;
+    r->delay_removed = -1;
+    r->last_amp = 0;
+    r->accumulator = 0;
+    memset( r->buffer_in, 0, sizeof(r->buffer_in) );
+    memset( r->buffer_out, 0, sizeof(r->buffer_out) );
+
+    return r;
+}
+
+void resampler_delete(void * _r)
+{
+    free( _r );
+}
+
+void * resampler_dup(const void * _r)
+{
+    void * r_out = malloc( sizeof(resampler) );
+    if ( !r_out ) return 0;
+
+    resampler_dup_inplace(r_out, _r);
+
+    return r_out;
+}
+
+void resampler_dup_inplace(void *_d, const void *_s)
+{
+    const resampler * r_in = ( const resampler * ) _s;
+    resampler * r_out = ( resampler * ) _d;
+
+    r_out->write_pos = r_in->write_pos;
+    r_out->write_filled = r_in->write_filled;
+    r_out->read_pos = r_in->read_pos;
+    r_out->read_filled = r_in->read_filled;
+    r_out->phase = r_in->phase;
+    r_out->phase_inc = r_in->phase_inc;
+    r_out->inv_phase = r_in->inv_phase;
+    r_out->inv_phase_inc = r_in->inv_phase_inc;
+    r_out->quality = r_in->quality;
+    r_out->delay_added = r_in->delay_added;
+    r_out->delay_removed = r_in->delay_removed;
+    r_out->last_amp = r_in->last_amp;
+    r_out->accumulator = r_in->accumulator;
+    memcpy( r_out->buffer_in, r_in->buffer_in, sizeof(r_in->buffer_in) );
+    memcpy( r_out->buffer_out, r_in->buffer_out, sizeof(r_in->buffer_out) );
+}
+
+void resampler_set_quality(void *_r, int quality)
+{
+    resampler * r = ( resampler * ) _r;
+    if (quality < RESAMPLER_QUALITY_MIN)
+        quality = RESAMPLER_QUALITY_MIN;
+    else if (quality > RESAMPLER_QUALITY_MAX)
+        quality = RESAMPLER_QUALITY_MAX;
+    if ( r->quality != quality )
+    {
+        if ( quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLEP ||
+             quality == RESAMPLER_QUALITY_BLAM || r->quality == RESAMPLER_QUALITY_BLAM )
+        {
+            r->read_pos = 0;
+            r->read_filled = 0;
+            r->last_amp = 0;
+            r->accumulator = 0;
+            memset( r->buffer_out, 0, sizeof(r->buffer_out) );
+        }
+        r->delay_added = -1;
+        r->delay_removed = -1;
+    }
+    r->quality = (unsigned char)quality;
+}
+
+int resampler_get_free_count(void *_r)
+{
+    resampler * r = ( resampler * ) _r;
+    return resampler_buffer_size - r->write_filled;
+}
+
+static int resampler_min_filled(resampler *r)
+{
+    switch (r->quality)
+    {
+    default:
+    case RESAMPLER_QUALITY_ZOH:
+    case RESAMPLER_QUALITY_BLEP:
+        return 1;
+            
+    case RESAMPLER_QUALITY_LINEAR:
+    case RESAMPLER_QUALITY_BLAM:
+        return 2;
+            
+    case RESAMPLER_QUALITY_CUBIC:
+        return 4;
+            
+    case RESAMPLER_QUALITY_SINC:
+        return SINC_WIDTH * 2;
+    }
+}
+
+static int resampler_input_delay(resampler *r)
+{
+    switch (r->quality)
+    {
+    default:
+    case RESAMPLER_QUALITY_ZOH:
+    case RESAMPLER_QUALITY_BLEP:
+    case RESAMPLER_QUALITY_LINEAR:
+    case RESAMPLER_QUALITY_BLAM:
+        return 0;
+            
+    case RESAMPLER_QUALITY_CUBIC:
+        return 1;
+            
+    case RESAMPLER_QUALITY_SINC:
+        return SINC_WIDTH - 1;
+    }
+}
+
+static int resampler_output_delay(resampler *r)
+{
+    switch (r->quality)
+    {
+    default:
+    case RESAMPLER_QUALITY_ZOH:
+    case RESAMPLER_QUALITY_LINEAR:
+    case RESAMPLER_QUALITY_CUBIC:
+    case RESAMPLER_QUALITY_SINC:
+        return 0;
+            
+    case RESAMPLER_QUALITY_BLEP:
+    case RESAMPLER_QUALITY_BLAM:
+        return SINC_WIDTH - 1;
+    }
+}
+
+int resampler_ready(void *_r)
+{
+    resampler * r = ( resampler * ) _r;
+    return r->write_filled > resampler_min_filled(r);
+}
+
+void resampler_clear(void *_r)
+{
+    resampler * r = ( resampler * ) _r;
+    r->write_pos = SINC_WIDTH - 1;
+    r->write_filled = 0;
+    r->read_pos = 0;
+    r->read_filled = 0;
+    r->phase.quad = 0;
+    r->delay_added = -1;
+    r->delay_removed = -1;
+    memset(r->buffer_in, 0, (SINC_WIDTH - 1) * sizeof(r->buffer_in[0]));
+    memset(r->buffer_in + resampler_buffer_size, 0, (SINC_WIDTH - 1) * sizeof(r->buffer_in[0]));
+    if (r->quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLAM)
+    {
+        r->inv_phase.quad = 0;
+        r->last_amp = 0;
+        r->accumulator = 0;
+        memset(r->buffer_out, 0, sizeof(r->buffer_out));
+    }
+}
+
+void resampler_set_rate(void *_r, double new_factor)
+{
+    resampler * r = ( resampler * ) _r;
+    r->phase_inc.quad = (long long)(new_factor * 0x100000000ll);
+    new_factor = 1.0 / new_factor;
+    r->inv_phase_inc.quad = (long long)(new_factor * 0x100000000ll);
+}
+
+void resampler_write_sample(void *_r, short s)
+{
+    resampler * r = ( resampler * ) _r;
+
+    if ( r->delay_added < 0 )
+    {
+        r->delay_added = 0;
+        r->write_filled = resampler_input_delay( r );
+    }
+    
+    if ( r->write_filled < resampler_buffer_size )
+    {
+        float s32 = s;
+        s32 *= 256.0;
+
+        r->buffer_in[ r->write_pos ] = s32;
+        r->buffer_in[ r->write_pos + resampler_buffer_size ] = s32;
+
+        ++r->write_filled;
+
+        r->write_pos = ( r->write_pos + 1 ) % resampler_buffer_size;
+    }
+}
+
+void resampler_write_sample_fixed(void *_r, int s, unsigned char depth)
+{
+    resampler * r = ( resampler * ) _r;
+    
+    if ( r->delay_added < 0 )
+    {
+        r->delay_added = 0;
+        r->write_filled = resampler_input_delay( r );
+    }
+    
+    if ( r->write_filled < resampler_buffer_size )
+    {
+        double s32 = s;
+        s32 /= (double)(1 << (depth - 1));
+        
+        r->buffer_in[ r->write_pos ] = (float)s32;
+        r->buffer_in[ r->write_pos + resampler_buffer_size ] = (float)s32;
+        
+        ++r->write_filled;
+        
+        r->write_pos = ( r->write_pos + 1 ) % resampler_buffer_size;
+    }
+}
+
+static int resampler_run_zoh(resampler * r, float ** out_, float * out_end)
+{
+    int in_size = r->write_filled;
+    float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
+    int used = 0;
+    in_size -= 1;
+    if ( in_size > 0 )
+    {
+        float* out = *out_;
+        float const* in = in_;
+        float const* const in_end = in + in_size;
+        bigint phase = r->phase;
+        bigint phase_inc = r->phase_inc;
+        do
+        {
+            if ( out >= out_end )
+                break;
+
+            *out++ = *in;
+            
+            phase.quad += phase_inc.quad;
+            
+            ADD_HI(in, phase);
+            
+            CLEAR_HI(phase);
+        }
+        while ( in < in_end );
+        
+        r->phase = phase;
+        *out_ = out;
+        
+        used = (int)(in - in_);
+        
+        r->write_filled -= used;
+    }
+    
+    return used;
+}
+
+#ifndef RESAMPLER_NEON
+static int resampler_run_blep(resampler * r, float ** out_, float * out_end)
+{
+    int in_size = r->write_filled;
+    float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
+    int used = 0;
+    in_size -= 1;
+    if ( in_size > 0 )
+    {
+        float* out = *out_;
+        float const* in = in_;
+        float const* const in_end = in + in_size;
+        double last_amp = r->last_amp;
+        bigint inv_phase = r->inv_phase;
+        bigint inv_phase_inc = r->inv_phase_inc;
+
+        const int step = (int)(RESAMPLER_BLEP_CUTOFF * RESAMPLER_RESOLUTION);
+        const int window_step = RESAMPLER_RESOLUTION;
+        
+        do
+        {
+            double sample;
+            
+            if ( out + SINC_WIDTH * 2 > out_end )
+                break;
+            
+            sample = *in++ - last_amp;
+            
+            if (sample)
+            {
+                double kernel[SINC_WIDTH * 2], kernel_sum = 0.0f;
+                int phase_reduced = PHASE_REDUCE(inv_phase);
+                int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION;
+                int i = SINC_WIDTH;
+
+                for (; i >= -SINC_WIDTH + 1; --i)
+                {
+                    int pos = i * step;
+                    int window_pos = i * window_step;
+                    kernel_sum += kernel[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)];
+                }
+                last_amp += sample;
+                sample /= kernel_sum;
+                for (i = 0; i < SINC_WIDTH * 2; ++i)
+                    out[i] += (float)(sample * kernel[i]);
+            }
+            
+            inv_phase.quad += inv_phase_inc.quad;
+            
+            ADD_HI(out, inv_phase);
+            
+            CLEAR_HI(inv_phase);
+        }
+        while ( in < in_end );
+        
+        r->inv_phase = inv_phase;
+        r->last_amp = last_amp;
+        *out_ = out;
+        
+        used = (int)(in - in_);
+        
+        r->write_filled -= used;
+    }
+    
+    return used;
+}
+#endif
+
+#ifdef RESAMPLER_SSE
+static int resampler_run_blep_sse(resampler * r, float ** out_, float * out_end)
+{
+    int in_size = r->write_filled;
+    float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
+    int used = 0;
+    in_size -= 1;
+    if ( in_size > 0 )
+    {
+        float* out = *out_;
+        float const* in = in_;
+        float const* const in_end = in + in_size;
+        double last_amp = r->last_amp;
+        bigint inv_phase = r->inv_phase;
+        bigint inv_phase_inc = r->inv_phase_inc;
+
+        const int step = (int)(RESAMPLER_BLEP_CUTOFF * RESAMPLER_RESOLUTION);
+        const int window_step = RESAMPLER_RESOLUTION;
+        
+        do
+        {
+            double sample;
+            
+            if ( out + SINC_WIDTH * 2 > out_end )
+                break;
+            
+            sample = *in++ - last_amp;
+            
+            if (sample)
+            {
+                float kernel_sum = 0.0f;
+                __m128 kernel[SINC_WIDTH / 2];
+                __m128 temp1, temp2;
+                __m128 samplex;
+                float *kernelf = (float*)(&kernel);
+                int phase_reduced = PHASE_REDUCE(inv_phase);
+                int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION;
+                int i = SINC_WIDTH;
+
+                for (; i >= -SINC_WIDTH + 1; --i)
+                {
+                    int pos = i * step;
+                    int window_pos = i * window_step;
+                    kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)];
+                }
+                last_amp += sample;
+                sample /= kernel_sum;
+				samplex = _mm_set1_ps( (float)sample );
+                for (i = 0; i < SINC_WIDTH / 2; ++i)
+                {
+                    temp1 = _mm_load_ps( (const float *)( kernel + i ) );
+                    temp1 = _mm_mul_ps( temp1, samplex );
+                    temp2 = _mm_loadu_ps( (const float *) out + i * 4 );
+                    temp1 = _mm_add_ps( temp1, temp2 );
+                    _mm_storeu_ps( (float *) out + i * 4, temp1 );
+                }
+            }
+            
+            inv_phase.quad += inv_phase_inc.quad;
+            
+            ADD_HI(out, inv_phase);
+            
+            CLEAR_HI(inv_phase);
+        }
+        while ( in < in_end );
+        
+        r->inv_phase = inv_phase;
+        r->last_amp = last_amp;
+        *out_ = out;
+        
+        used = (int)(in - in_);
+        
+        r->write_filled -= used;
+    }
+    
+    return used;
+}
+#endif
+
+#ifdef RESAMPLER_NEON
+static int resampler_run_blep(resampler * r, float ** out_, float * out_end)
+{
+    int in_size = r->write_filled;
+    float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
+    int used = 0;
+    in_size -= 1;
+    if ( in_size > 0 )
+    {
+        float* out = *out_;
+        float const* in = in_;
+        float const* const in_end = in + in_size;
+        float last_amp = r->last_amp;
+        bigint inv_phase = r->inv_phase;
+        bigint inv_phase_inc = r->inv_phase_inc;
+
+        const int step = RESAMPLER_BLEP_CUTOFF * RESAMPLER_RESOLUTION;
+        const int window_step = RESAMPLER_RESOLUTION;
+        
+        do
+        {
+            float sample;
+            
+            if ( out + SINC_WIDTH * 2 > out_end )
+                break;
+            
+            sample = *in++ - last_amp;
+            
+            if (sample)
+            {
+                float kernel_sum = 0.0f;
+                float32x4_t kernel[SINC_WIDTH / 2];
+                float32x4_t temp1, temp2;
+                float32x4_t samplex;
+                float *kernelf = (float*)(&kernel);
+                int phase_reduced = PHASE_REDUCE(inv_phase);
+                int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION;
+                int i = SINC_WIDTH;
+
+                for (; i >= -SINC_WIDTH + 1; --i)
+                {
+                    int pos = i * step;
+                    int window_pos = i * window_step;
+                    kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)];
+                }
+                last_amp += sample;
+                sample /= kernel_sum;
+                samplex = vdupq_n_f32(sample);
+                for (i = 0; i < SINC_WIDTH / 2; ++i)
+                {
+                    temp1 = vld1q_f32( (const float32_t *)( kernel + i ) );
+                    temp2 = vld1q_f32( (const float32_t *) out + i * 4 );
+                    temp2 = vmlaq_f32( temp2, temp1, samplex );
+                    vst1q_f32( (float32_t *) out + i * 4, temp2 );
+                }
+            }
+            
+            inv_phase.quad += inv_phase_inc.quad;
+            
+            ADD_HI(out, inv_phase);
+            
+            CLEAR_HI(inv_phase);
+        }
+        while ( in < in_end );
+        
+        r->inv_phase = inv_phase;
+        r->last_amp = last_amp;
+        *out_ = out;
+        
+        used = (int)(in - in_);
+        
+        r->write_filled -= used;
+    }
+    
+    return used;
+}
+#endif
+
+static int resampler_run_linear(resampler * r, float ** out_, float * out_end)
+{
+    int in_size = r->write_filled;
+    float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
+    int used = 0;
+    in_size -= 2;
+    if ( in_size > 0 )
+    {
+        float* out = *out_;
+        float const* in = in_;
+        float const* const in_end = in + in_size;
+        bigint phase = r->phase;
+        bigint phase_inc = r->phase_inc;
+
+        do
+        {
+            if ( out >= out_end )
+                break;
+         
+            *out++ = (float)(in[0] + (in[1] - in[0]) * phase.lo * (1.f / 0x100000000ll));
+            
+            phase.quad += phase_inc.quad;
+            
+            ADD_HI(in, phase);
+            
+            CLEAR_HI(phase);
+        }
+        while ( in < in_end );
+        
+        r->phase = phase;
+        *out_ = out;
+        
+        used = (int)(in - in_);
+        
+        r->write_filled -= used;
+    }
+    
+    return used;
+}
+
+#ifndef RESAMPLER_NEON
+static int resampler_run_blam(resampler * r, float ** out_, float * out_end)
+{
+    int in_size = r->write_filled;
+    float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
+    int used = 0;
+    in_size -= 2;
+    if ( in_size > 0 )
+    {
+        float* out = *out_;
+        float const* in = in_;
+        float const* const in_end = in + in_size;
+        double last_amp = r->last_amp;
+        bigint phase = r->phase;
+        bigint phase_inc = r->phase_inc;
+        bigint inv_phase = r->inv_phase;
+        bigint inv_phase_inc = r->inv_phase_inc;
+
+        const int step = (int)(RESAMPLER_BLAM_CUTOFF * RESAMPLER_RESOLUTION);
+        const int window_step = RESAMPLER_RESOLUTION;
+
+        do
+        {
+            double sample;
+            
+            if ( out + SINC_WIDTH * 2 > out_end )
+                break;
+            
+            sample = in[0];
+            if (phase_inc.quad < 0x100000000ll)
+                sample += (in[1] - in[0]) * phase.quad * (1.f / 0x100000000ll);
+            sample -= last_amp;
+            
+            if (sample)
+            {
+                double kernel[SINC_WIDTH * 2], kernel_sum = 0.0f;
+                int phase_reduced = PHASE_REDUCE(inv_phase);
+                int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION;
+                int i = SINC_WIDTH;
+
+                for (; i >= -SINC_WIDTH + 1; --i)
+                {
+                    int pos = i * step;
+                    int window_pos = i * window_step;
+                    kernel_sum += kernel[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)];
+                }
+                last_amp += sample;
+                sample /= kernel_sum;
+                for (i = 0; i < SINC_WIDTH * 2; ++i)
+                    out[i] += (float)(sample * kernel[i]);
+            }
+            
+            if (inv_phase_inc.quad < 0x100000000ll)
+            {
+                ++in;
+                inv_phase.quad += inv_phase_inc.quad;
+                ADD_HI(out, inv_phase);
+                CLEAR_HI(inv_phase);
+            }
+            else
+            {
+                phase.quad += phase_inc.quad;
+                ++out;
+                ADD_HI(in, phase);
+                CLEAR_HI(phase);
+            }
+        }
+        while ( in < in_end );
+        
+        r->phase = phase;
+        r->inv_phase = inv_phase;
+        r->last_amp = last_amp;
+        *out_ = out;
+        
+        used = (int)(in - in_);
+        
+        r->write_filled -= used;
+    }
+    
+    return used;
+}
+#endif
+
+#ifdef RESAMPLER_SSE
+static int resampler_run_blam_sse(resampler * r, float ** out_, float * out_end)
+{
+    int in_size = r->write_filled;
+    float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
+    int used = 0;
+    in_size -= 2;
+    if ( in_size > 0 )
+    {
+        float* out = *out_;
+        float const* in = in_;
+        float const* const in_end = in + in_size;
+        double last_amp = r->last_amp;
+        bigint phase = r->phase;
+        bigint phase_inc = r->phase_inc;
+        bigint inv_phase = r->inv_phase;
+        bigint inv_phase_inc = r->inv_phase_inc;
+
+        const int step = (int)(RESAMPLER_BLAM_CUTOFF * RESAMPLER_RESOLUTION);
+        const int window_step = RESAMPLER_RESOLUTION;
+
+        do
+        {
+            double sample;
+            
+            if ( out + SINC_WIDTH * 2 > out_end )
+                break;
+
+            sample = in[0];
+            if (phase_inc.quad < 0x100000000ll)
+            {
+                sample += (in[1] - in[0]) * phase.quad * (1.f / 0x100000000ll);
+            }
+            sample -= last_amp;
+            
+            if (sample)
+            {
+                float kernel_sum = 0.0f;
+                __m128 kernel[SINC_WIDTH / 2];
+                __m128 temp1, temp2;
+                __m128 samplex;
+                float *kernelf = (float*)(&kernel);
+				int phase_reduced = PHASE_REDUCE(inv_phase);
+                int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION;
+                int i = SINC_WIDTH;
+
+                for (; i >= -SINC_WIDTH + 1; --i)
+                {
+                    int pos = i * step;
+                    int window_pos = i * window_step;
+                    kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)];
+                }
+                last_amp += sample;
+                sample /= kernel_sum;
+                samplex = _mm_set1_ps( (float)sample );
+                for (i = 0; i < SINC_WIDTH / 2; ++i)
+                {
+                    temp1 = _mm_load_ps( (const float *)( kernel + i ) );
+                    temp1 = _mm_mul_ps( temp1, samplex );
+                    temp2 = _mm_loadu_ps( (const float *) out + i * 4 );
+                    temp1 = _mm_add_ps( temp1, temp2 );
+                    _mm_storeu_ps( (float *) out + i * 4, temp1 );
+                }
+            }
+            
+            if (inv_phase_inc.quad < 0x100000000ll)
+            {
+                ++in;
+                inv_phase.quad += inv_phase_inc.quad;
+                ADD_HI(out, inv_phase);
+                CLEAR_HI(inv_phase);
+            }
+            else
+            {
+                phase.quad += phase_inc.quad;
+                ++out;
+                
+                if (phase.quad >= 0x100000000ll)
+                {
+                    ++in;
+                    CLEAR_HI(phase);
+                }
+            }
+        }
+        while ( in < in_end );
+
+        r->phase = phase;
+        r->inv_phase = inv_phase;
+        r->last_amp = last_amp;
+        *out_ = out;
+        
+        used = (int)(in - in_);
+        
+        r->write_filled -= used;
+    }
+    
+    return used;
+}
+#endif
+
+#ifdef RESAMPLER_NEON
+static int resampler_run_blam(resampler * r, float ** out_, float * out_end)
+{
+    int in_size = r->write_filled;
+    float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
+    int used = 0;
+    in_size -= 2;
+    if ( in_size > 0 )
+    {
+        float* out = *out_;
+        float const* in = in_;
+        float const* const in_end = in + in_size;
+        float last_amp = r->last_amp;
+        bigint phase = r->phase;
+        bigint phase_inc = r->phase_inc;
+        bigint inv_phase = r->inv_phase;
+        bigint inv_phase_inc = r->inv_phase_inc;
+        
+        const int step = RESAMPLER_BLAM_CUTOFF * RESAMPLER_RESOLUTION;
+        const int window_step = RESAMPLER_RESOLUTION;
+
+        do
+        {
+            float sample;
+            
+            if ( out + SINC_WIDTH * 2 > out_end )
+                break;
+            
+            sample = in[0];
+            if (phase_inc.quad < 0x100000000ll)
+                sample += (in[1] - in[0]) * phase;
+            sample -= last_amp;
+            
+            if (sample)
+            {
+                float kernel_sum = 0.0;
+                float32x4_t kernel[SINC_WIDTH / 2];
+                float32x4_t temp1, temp2;
+                float32x4_t samplex;
+                float *kernelf = (float*)(&kernel);
+                int phase_reduced = PHASE_REDUCE(inv_phase);
+                int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION;
+                int i = SINC_WIDTH;
+
+                for (; i >= -SINC_WIDTH + 1; --i)
+                {
+                    int pos = i * step;
+                    int window_pos = i * window_step;
+                    kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)];
+                }
+                last_amp += sample;
+                sample /= kernel_sum;
+                samplex = vdupq_n_f32(sample);
+                for (i = 0; i < SINC_WIDTH / 2; ++i)
+                {
+                    temp1 = vld1q_f32( (const float32_t *)( kernel + i ) );
+                    temp2 = vld1q_f32( (const float32_t *) out + i * 4 );
+                    temp2 = vmlaq_f32( temp2, temp1, samplex );
+                    vst1q_f32( (float32_t *) out + i * 4, temp2 );
+                }
+            }
+
+            if (inv_phase_inc.quad < 0x100000000ll)
+            {
+                ++in;
+                inv_phase.quad += inv_phase_inc.quad;
+                ADD_HI(out, inv_phase);
+                CLEAR_HI(inv_phase);
+            }
+            else
+            {
+                phase.quad += phase_inc.quad;
+                ++out;
+                
+                if (phase.quad >= 0x100000000ll)
+                {
+                    ++in;
+                    CLEAR_HI(phase);
+                }
+            }
+        }
+        while ( in < in_end );
+        
+        r->phase = phase;
+        r->inv_phase = inv_phase;
+        r->last_amp = last_amp;
+        *out_ = out;
+        
+        used = (int)(in - in_);
+        
+        r->write_filled -= used;
+    }
+    
+    return used;
+}
+#endif
+
+#ifndef RESAMPLER_NEON
+static int resampler_run_cubic(resampler * r, float ** out_, float * out_end)
+{
+    int in_size = r->write_filled;
+    float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
+    int used = 0;
+    in_size -= 4;
+    if ( in_size > 0 )
+    {
+        float* out = *out_;
+        float const* in = in_;
+        float const* const in_end = in + in_size;
+        bigint phase = r->phase;
+        bigint phase_inc = r->phase_inc;
+
+        do
+        {
+            float * kernel;
+            int i;
+            float sample;
+            
+            if ( out >= out_end )
+                break;
+            
+            kernel = cubic_lut + PHASE_REDUCE(phase) * 4;
+            
+            for (sample = 0, i = 0; i < 4; ++i)
+                sample += in[i] * kernel[i];
+            *out++ = sample;
+            
+            phase.quad += phase_inc.quad;
+            
+            ADD_HI(in, phase);
+            
+            CLEAR_HI(phase);
+        }
+        while ( in < in_end );
+        
+        r->phase = phase;
+        *out_ = out;
+        
+        used = (int)(in - in_);
+        
+        r->write_filled -= used;
+    }
+    
+    return used;
+}
+#endif
+
+#ifdef RESAMPLER_SSE
+static int resampler_run_cubic_sse(resampler * r, float ** out_, float * out_end)
+{
+    int in_size = r->write_filled;
+    float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
+    int used = 0;
+    in_size -= 4;
+    if ( in_size > 0 )
+    {
+        float* out = *out_;
+        float const* in = in_;
+        float const* const in_end = in + in_size;
+        bigint phase = r->phase;
+        bigint phase_inc = r->phase_inc;
+
+        do
+        {
+            __m128 temp1, temp2;
+            __m128 samplex = _mm_setzero_ps();
+            
+            if ( out >= out_end )
+                break;
+            
+            temp1 = _mm_loadu_ps( (const float *)( in ) );
+            temp2 = _mm_load_ps( (const float *)( cubic_lut + PHASE_REDUCE(phase) * 4 ) );
+            temp1 = _mm_mul_ps( temp1, temp2 );
+            samplex = _mm_add_ps( samplex, temp1 );
+            temp1 = _mm_movehl_ps( temp1, samplex );
+            samplex = _mm_add_ps( samplex, temp1 );
+            temp1 = samplex;
+            temp1 = _mm_shuffle_ps( temp1, samplex, _MM_SHUFFLE(0, 0, 0, 1) );
+            samplex = _mm_add_ps( samplex, temp1 );
+            _mm_store_ss( out, samplex );
+            ++out;
+            
+            phase.quad += phase_inc.quad;
+            
+            ADD_HI(in, phase);
+            
+            CLEAR_HI(phase);
+        }
+        while ( in < in_end );
+        
+        r->phase = phase;
+        *out_ = out;
+        
+        used = (int)(in - in_);
+        
+        r->write_filled -= used;
+    }
+    
+    return used;
+}
+#endif
+
+#ifdef RESAMPLER_NEON
+static int resampler_run_cubic(resampler * r, float ** out_, float * out_end)
+{
+    int in_size = r->write_filled;
+    float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
+    int used = 0;
+    in_size -= 4;
+    if ( in_size > 0 )
+    {
+        float* out = *out_;
+        float const* in = in_;
+        float const* const in_end = in + in_size;
+        bigint phase = r->phase;
+        bigint phase_inc = r->phase_inc;
+        
+        do
+        {
+            float32x4_t temp1, temp2;
+            float32x2_t half;
+            
+            if ( out >= out_end )
+                break;
+            
+            temp1 = vld1q_f32( (const float32_t *)( in ) );
+            temp2 = vld1q_f32( (const float32_t *)( cubic_lut + PHASE_REDUCE(phase) * 4 ) );
+            temp1 = vmulq_f32( temp1, temp2 );
+            half = vadd_f32(vget_high_f32(temp1), vget_low_f32(temp1));
+            *out++ = vget_lane_f32(vpadd_f32(half, half), 0);
+            
+            phase.quad += phase_inc.quad;
+            
+            ADD_HI(in, phase)
+            
+            CLEAR_HI(phase);
+        }
+        while ( in < in_end );
+        
+        r->phase = phase;
+        *out_ = out;
+        
+        used = (int)(in - in_);
+        
+        r->write_filled -= used;
+    }
+    
+    return used;
+}
+#endif
+
+#ifndef RESAMPLER_NEON
+static int resampler_run_sinc(resampler * r, float ** out_, float * out_end)
+{
+    int in_size = r->write_filled;
+    float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
+    int used = 0;
+    in_size -= SINC_WIDTH * 2;
+    if ( in_size > 0 )
+    {
+        float* out = *out_;
+        float const* in = in_;
+        float const* const in_end = in + in_size;
+        bigint phase = r->phase;
+        bigint phase_inc = r->phase_inc;
+
+        int step = phase_inc.quad > 0x100000000ll ?
+			(int)(RESAMPLER_RESOLUTION / (phase_inc.quad * (1.f / 0x100000000ll)) * RESAMPLER_SINC_CUTOFF) :
+			(int)(RESAMPLER_RESOLUTION * RESAMPLER_SINC_CUTOFF);
+        int window_step = RESAMPLER_RESOLUTION;
+
+        do
+        {
+            double kernel[SINC_WIDTH * 2], kernel_sum = 0.0;
+            int i = SINC_WIDTH;
+            int phase_reduced = PHASE_REDUCE(phase);
+            int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION;
+            float sample;
+
+            if ( out >= out_end )
+                break;
+
+            for (; i >= -SINC_WIDTH + 1; --i)
+            {
+                int pos = i * step;
+                int window_pos = i * window_step;
+                kernel_sum += kernel[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)];
+            }
+            for (sample = 0, i = 0; i < SINC_WIDTH * 2; ++i)
+                sample += (float)(in[i] * kernel[i]);
+            *out++ = (float)(sample / kernel_sum);
+
+            phase.quad += phase_inc.quad;
+
+            ADD_HI(in, phase);
+
+            CLEAR_HI(phase);
+        }
+        while ( in < in_end );
+
+        r->phase = phase;
+        *out_ = out;
+
+        used = (int)(in - in_);
+
+        r->write_filled -= used;
+    }
+
+    return used;
+}
+#endif
+
+#ifdef RESAMPLER_SSE
+static int resampler_run_sinc_sse(resampler * r, float ** out_, float * out_end)
+{
+    int in_size = r->write_filled;
+    float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
+    int used = 0;
+    in_size -= SINC_WIDTH * 2;
+    if ( in_size > 0 )
+    {
+        float* out = *out_;
+        float const* in = in_;
+        float const* const in_end = in + in_size;
+        bigint phase = r->phase;
+        bigint phase_inc = r->phase_inc;
+
+        int step = phase_inc.quad > 0x100000000ll ?
+			(int)(RESAMPLER_RESOLUTION / (phase_inc.quad * (1.f / 0x100000000ll)) * RESAMPLER_SINC_CUTOFF) :
+			(int)(RESAMPLER_RESOLUTION * RESAMPLER_SINC_CUTOFF);
+        int window_step = RESAMPLER_RESOLUTION;
+        
+        do
+        {
+            // accumulate in extended precision
+            float kernel_sum = 0.0;
+            __m128 kernel[SINC_WIDTH / 2];
+            __m128 temp1, temp2;
+            __m128 samplex = _mm_setzero_ps();
+            float *kernelf = (float*)(&kernel);
+            int i = SINC_WIDTH;
+            int phase_reduced = PHASE_REDUCE(phase);
+            int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION;
+            
+            if ( out >= out_end )
+                break;
+            
+            for (; i >= -SINC_WIDTH + 1; --i)
+            {
+                int pos = i * step;
+                int window_pos = i * window_step;
+                kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)];
+            }
+            for (i = 0; i < SINC_WIDTH / 2; ++i)
+            {
+                temp1 = _mm_loadu_ps( (const float *)( in + i * 4 ) );
+                temp2 = _mm_load_ps( (const float *)( kernel + i ) );
+                temp1 = _mm_mul_ps( temp1, temp2 );
+                samplex = _mm_add_ps( samplex, temp1 );
+            }
+            kernel_sum = 1.0f / kernel_sum;
+            temp1 = _mm_movehl_ps( temp1, samplex );
+            samplex = _mm_add_ps( samplex, temp1 );
+            temp1 = samplex;
+            temp1 = _mm_shuffle_ps( temp1, samplex, _MM_SHUFFLE(0, 0, 0, 1) );
+            samplex = _mm_add_ps( samplex, temp1 );
+            temp1 = _mm_set_ss( kernel_sum );
+            samplex = _mm_mul_ps( samplex, temp1 );
+            _mm_store_ss( out, samplex );
+            ++out;
+            
+            phase.quad += phase_inc.quad;
+            
+            ADD_HI(in, phase);
+            
+            CLEAR_HI(phase);
+        }
+        while ( in < in_end );
+        
+        r->phase = phase;
+        *out_ = out;
+        
+        used = (int)(in - in_);
+        
+        r->write_filled -= used;
+    }
+    
+    return used;
+}
+#endif
+
+#ifdef RESAMPLER_NEON
+static int resampler_run_sinc(resampler * r, float ** out_, float * out_end)
+{
+    int in_size = r->write_filled;
+    float const* in_ = r->buffer_in + resampler_buffer_size + r->write_pos - r->write_filled;
+    int used = 0;
+    in_size -= SINC_WIDTH * 2;
+    if ( in_size > 0 )
+    {
+        float* out = *out_;
+        float const* in = in_;
+        float const* const in_end = in + in_size;
+        bigint phase = r->phase;
+        bigint phase_inc = r->phase_inc;
+        
+        int step = phase_inc.quad > 0x100000000ll ?
+			(int)(RESAMPLER_RESOLUTION / (phase_inc.quad * (1.f / 0x100000000ll)) * RESAMPLER_SINC_CUTOFF) :
+			(int)(RESAMPLER_RESOLUTION * RESAMPLER_SINC_CUTOFF);
+        int window_step = RESAMPLER_RESOLUTION;
+        
+        do
+        {
+            // accumulate in extended precision
+            float kernel_sum = 0.0;
+            float32x4_t kernel[SINC_WIDTH / 2];
+            float32x4_t temp1, temp2;
+            float32x4_t samplex = {0};
+            float32x2_t half;
+            float *kernelf = (float*)(&kernel);
+            int i = SINC_WIDTH;
+            int phase_reduced = PHASE_REDUCE(phase);
+            int phase_adj = phase_reduced * step / RESAMPLER_RESOLUTION;
+            
+            if ( out >= out_end )
+                break;
+            
+            for (; i >= -SINC_WIDTH + 1; --i)
+            {
+                int pos = i * step;
+                int window_pos = i * window_step;
+                kernel_sum += kernelf[i + SINC_WIDTH - 1] = sinc_lut[abs(phase_adj - pos)] * window_lut[abs(phase_reduced - window_pos)];
+            }
+            for (i = 0; i < SINC_WIDTH / 2; ++i)
+            {
+                temp1 = vld1q_f32( (const float32_t *)( in + i * 4 ) );
+                temp2 = vld1q_f32( (const float32_t *)( kernel + i ) );
+                samplex = vmlaq_f32( samplex, temp1, temp2 );
+            }
+            kernel_sum = 1.0 / kernel_sum;
+            samplex = vmulq_f32(samplex, vmovq_n_f32(kernel_sum));
+            half = vadd_f32(vget_high_f32(samplex), vget_low_f32(samplex));
+            *out++ = vget_lane_f32(vpadd_f32(half, half), 0);
+            
+            phase.quad += phase_inc.quad;
+            
+            ADD_HI(in, phase);
+            
+            CLEAR_HI(phase);
+        }
+        while ( in < in_end );
+        
+        r->phase = phase;
+        *out_ = out;
+        
+        used = (int)(in - in_);
+        
+        r->write_filled -= used;
+    }
+    
+    return used;
+}
+#endif
+
+static void resampler_fill(resampler * r)
+{
+    int min_filled = resampler_min_filled(r);
+    int quality = r->quality;
+    while ( r->write_filled > min_filled &&
+            r->read_filled < resampler_buffer_size )
+    {
+        int write_pos = ( r->read_pos + r->read_filled ) % resampler_buffer_size;
+        int write_size = resampler_buffer_size - write_pos;
+        float * out = r->buffer_out + write_pos;
+        if ( write_size > ( resampler_buffer_size - r->read_filled ) )
+            write_size = resampler_buffer_size - r->read_filled;
+        switch (quality)
+        {
+        case RESAMPLER_QUALITY_ZOH:
+            resampler_run_zoh( r, &out, out + write_size );
+            break;
+                
+        case RESAMPLER_QUALITY_BLEP:
+        {
+            int used;
+            int write_extra = 0;
+            if ( write_pos >= r->read_pos )
+                write_extra = r->read_pos;
+            if ( write_extra > SINC_WIDTH * 2 - 1 )
+                write_extra = SINC_WIDTH * 2 - 1;
+            memcpy( r->buffer_out + resampler_buffer_size, r->buffer_out, write_extra * sizeof(r->buffer_out[0]) );
+#ifdef RESAMPLER_SSE
+            if ( resampler_has_sse )
+                used = resampler_run_blep_sse( r, &out, out + write_size + write_extra );
+            else
+#endif
+                used = resampler_run_blep( r, &out, out + write_size + write_extra );
+            memcpy( r->buffer_out, r->buffer_out + resampler_buffer_size, write_extra * sizeof(r->buffer_out[0]) );
+            if (!used)
+                return;
+            break;
+        }
+                
+        case RESAMPLER_QUALITY_LINEAR:
+            resampler_run_linear( r, &out, out + write_size );
+            break;
+                
+        case RESAMPLER_QUALITY_BLAM:
+        {
+            float * out_ = out;
+            int write_extra = 0;
+            if ( write_pos >= r->read_pos )
+                write_extra = r->read_pos;
+            if ( write_extra > SINC_WIDTH * 2 - 1 )
+                write_extra = SINC_WIDTH * 2 - 1;
+            memcpy( r->buffer_out + resampler_buffer_size, r->buffer_out, write_extra * sizeof(r->buffer_out[0]) );
+#ifdef RESAMPLER_SSE
+            if ( resampler_has_sse )
+                resampler_run_blam_sse( r, &out, out + write_size + write_extra );
+            else
+#endif
+                resampler_run_blam( r, &out, out + write_size + write_extra );
+            memcpy( r->buffer_out, r->buffer_out + resampler_buffer_size, write_extra * sizeof(r->buffer_out[0]) );
+            if ( out == out_ )
+                return;
+            break;
+        }
+
+        case RESAMPLER_QUALITY_CUBIC:
+#ifdef RESAMPLER_SSE
+            if ( resampler_has_sse )
+                resampler_run_cubic_sse( r, &out, out + write_size );
+            else
+#endif
+                resampler_run_cubic( r, &out, out + write_size );
+            break;
+                
+        case RESAMPLER_QUALITY_SINC:
+#ifdef RESAMPLER_SSE
+            if ( resampler_has_sse )
+                resampler_run_sinc_sse( r, &out, out + write_size );
+            else
+#endif
+                resampler_run_sinc( r, &out, out + write_size );
+            break;
+        }
+        r->read_filled += (int)(out - r->buffer_out - write_pos);
+    }
+}
+
+static void resampler_fill_and_remove_delay(resampler * r)
+{
+    resampler_fill( r );
+    if ( r->delay_removed < 0 )
+    {
+        int delay = resampler_output_delay( r );
+        r->delay_removed = 0;
+        while ( delay-- )
+            resampler_remove_sample( r, 1 );
+    }
+}
+
+int resampler_get_sample_count(void *_r)
+{
+    resampler * r = ( resampler * ) _r;
+    if ( r->read_filled < 1 && ((r->quality != RESAMPLER_QUALITY_BLEP && r->quality != RESAMPLER_QUALITY_BLAM) || r->inv_phase_inc.quad))
+        resampler_fill_and_remove_delay( r );
+    return r->read_filled;
+}
+
+int resampler_get_sample(void *_r)
+{
+    resampler * r = ( resampler * ) _r;
+    if ( r->read_filled < 1 && r->phase_inc.quad)
+        resampler_fill_and_remove_delay( r );
+    if ( r->read_filled < 1 )
+        return 0;
+    if ( r->quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLAM )
+        return (int)(r->buffer_out[ r->read_pos ] + r->accumulator);
+    else
+        return (int)r->buffer_out[ r->read_pos ];
+}
+
+float resampler_get_sample_float(void *_r)
+{
+    resampler * r = ( resampler * ) _r;
+    if ( r->read_filled < 1 && r->phase_inc.quad)
+        resampler_fill_and_remove_delay( r );
+    if ( r->read_filled < 1 )
+        return 0;
+    if ( r->quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLAM )
+        return (float)(r->buffer_out[ r->read_pos ] + r->accumulator);
+    else
+        return r->buffer_out[ r->read_pos ];
+}
+
+void resampler_remove_sample(void *_r, int decay)
+{
+    resampler * r = ( resampler * ) _r;
+    if ( r->read_filled > 0 )
+    {
+        if ( r->quality == RESAMPLER_QUALITY_BLEP || r->quality == RESAMPLER_QUALITY_BLAM )
+        {
+            r->accumulator += r->buffer_out[ r->read_pos ];
+            r->buffer_out[ r->read_pos ] = 0;
+            if (decay)
+            {
+                r->accumulator -= r->accumulator * (1.0f / 8192.0f);
+                if (fabs(r->accumulator) < 1e-20f)
+                    r->accumulator = 0;
+            }
+        }
+        --r->read_filled;
+        r->read_pos = ( r->read_pos + 1 ) % resampler_buffer_size;
+    }
+}
diff --git a/libraries/dumb/src/helpers/riff.c b/libraries/dumb/src/helpers/riff.c
new file mode 100644
index 000000000..6589d12ff
--- /dev/null
+++ b/libraries/dumb/src/helpers/riff.c
@@ -0,0 +1,87 @@
+#include "dumb.h"
+#include "internal/riff.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+struct riff * riff_parse( DUMBFILE * f, int32 offset, int32 size, unsigned proper )
+{
+	unsigned stream_size;
+	struct riff * stream;
+
+
+    if ( size < 8 ) return 0;
+
+    if ( dumbfile_seek(f, offset, DFS_SEEK_SET) ) return 0;
+    if ( dumbfile_mgetl(f) != DUMB_ID('R','I','F','F') ) return 0;
+
+    stream_size = dumbfile_igetl(f);
+    if ( stream_size + 8 > (unsigned)size ) return 0;
+	if ( stream_size < 4 ) return 0;
+
+    stream = (struct riff *) malloc( sizeof( struct riff ) );
+	if ( ! stream ) return 0;
+
+    stream->type = dumbfile_mgetl(f);
+	stream->chunk_count = 0;
+	stream->chunks = 0;
+
+	stream_size -= 4;
+
+    while ( stream_size && !dumbfile_error(f) )
+	{
+		struct riff_chunk * chunk;
+		if ( stream_size < 8 ) break;
+        stream->chunks = ( struct riff_chunk * ) realloc( stream->chunks, ( stream->chunk_count + 1 ) * sizeof( struct riff_chunk ) );
+		if ( ! stream->chunks ) break;
+		chunk = stream->chunks + stream->chunk_count;
+        chunk->type = dumbfile_mgetl(f);
+        chunk->size = dumbfile_igetl(f);
+        chunk->offset = dumbfile_pos(f);
+		stream_size -= 8;
+		if ( stream_size < chunk->size ) break;
+        if ( chunk->type == DUMB_ID('R','I','F','F') )
+		{
+            chunk->nested = riff_parse( f, chunk->offset - 8, chunk->size + 8, proper );
+            if ( ! chunk->nested ) break;
+		}
+		else
+		{
+            chunk->nested = 0;
+		}
+        dumbfile_seek(f, chunk->offset + chunk->size, DFS_SEEK_SET);
+		stream_size -= chunk->size;
+		if ( proper && ( chunk->size & 1 ) )
+		{
+            dumbfile_skip(f, 1);
+			-- stream_size;
+		}
+		++stream->chunk_count;
+	}
+	
+	if ( stream_size )
+	{
+		riff_free( stream );
+		stream = 0;
+	}
+
+	return stream;
+}
+
+void riff_free( struct riff * stream )
+{
+	if ( stream )
+	{
+		if ( stream->chunks )
+		{
+			unsigned i;
+			for ( i = 0; i < stream->chunk_count; ++i )
+			{
+				struct riff_chunk * chunk = stream->chunks + i;
+                if ( chunk->nested ) riff_free( chunk->nested );
+			}
+			free( stream->chunks );
+		}
+		free( stream );
+	}
+}
diff --git a/libraries/dumb/src/helpers/sampbuf.c b/libraries/dumb/src/helpers/sampbuf.c
new file mode 100644
index 000000000..ea30d506f
--- /dev/null
+++ b/libraries/dumb/src/helpers/sampbuf.c
@@ -0,0 +1,64 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * sampbuf.c - Helper for allocating sample           / / \  \
+ *             buffers.                              | <  /   \_
+ *                                                   |  \/ /\   /
+ * By entheh.                                         \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <stdlib.h>
+#include "dumb.h"
+
+
+
+/* DEPRECATED */
+sample_t **create_sample_buffer(int n_channels, int32 length)
+{
+	int i;
+	sample_t **samples = malloc(n_channels * sizeof(*samples));
+	if (!samples) return NULL;
+	samples[0] = malloc(n_channels * length * sizeof(*samples[0]));
+	if (!samples[0]) {
+		free(samples);
+		return NULL;
+	}
+	for (i = 1; i < n_channels; i++) samples[i] = samples[i-1] + length;
+	return samples;
+}
+
+
+
+sample_t **DUMBEXPORT allocate_sample_buffer(int n_channels, int32 length)
+{
+	int i;
+	sample_t **samples = malloc(((n_channels + 1) >> 1) * sizeof(*samples));
+	if (!samples) return NULL;
+	samples[0] = malloc(n_channels * length * sizeof(*samples[0]));
+	if (!samples[0]) {
+		free(samples);
+		return NULL;
+	}
+	for (i = 1; i < (n_channels + 1) >> 1; i++) samples[i] = samples[i-1] + length*2;
+	return samples;
+}
+
+
+
+void DUMBEXPORT destroy_sample_buffer(sample_t **samples)
+{
+	if (samples) {
+		free(samples[0]);
+		free(samples);
+	}
+}
diff --git a/libraries/dumb/src/helpers/silence.c b/libraries/dumb/src/helpers/silence.c
new file mode 100644
index 000000000..428f6577f
--- /dev/null
+++ b/libraries/dumb/src/helpers/silence.c
@@ -0,0 +1,29 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * silence.c - Silencing helper.                      / / \  \
+ *                                                   | <  /   \_
+ * By entheh.                                        |  \/ /\   /
+ *                                                    \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <string.h>
+#include "dumb.h"
+
+
+
+void DUMBEXPORT dumb_silence(sample_t *samples, int32 length)
+{
+	memset(samples, 0, length * sizeof(*samples));
+}
+
diff --git a/libraries/dumb/src/helpers/stdfile.c b/libraries/dumb/src/helpers/stdfile.c
new file mode 100644
index 000000000..f46022791
--- /dev/null
+++ b/libraries/dumb/src/helpers/stdfile.c
@@ -0,0 +1,146 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * stdfile.c - stdio file input module.               / / \  \
+ *                                                   | <  /   \_
+ * By entheh.                                        |  \/ /\   /
+ *                                                    \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <stdio.h>
+
+#include "dumb.h"
+
+
+
+typedef struct dumb_stdfile
+{
+    FILE * file;
+    long size;
+} dumb_stdfile;
+
+
+
+static void *DUMBCALLBACK dumb_stdfile_open(const char *filename)
+{
+    dumb_stdfile * file = ( dumb_stdfile * ) malloc( sizeof(dumb_stdfile) );
+    if ( !file ) return 0;
+    file->file = fopen(filename, "rb");
+    fseek(file->file, 0, SEEK_END);
+    file->size = ftell(file->file);
+    fseek(file->file, 0, SEEK_SET);
+    return file;
+}
+
+
+
+static int DUMBCALLBACK dumb_stdfile_skip(void *f, long n)
+{
+    dumb_stdfile * file = ( dumb_stdfile * ) f;
+    return fseek(file->file, n, SEEK_CUR);
+}
+
+
+
+static int DUMBCALLBACK dumb_stdfile_getc(void *f)
+{
+    dumb_stdfile * file = ( dumb_stdfile * ) f;
+    return fgetc(file->file);
+}
+
+
+
+static int32 DUMBCALLBACK dumb_stdfile_getnc(char *ptr, int32 n, void *f)
+{
+    dumb_stdfile * file = ( dumb_stdfile * ) f;
+    return (int32)fread(ptr, 1, n, file->file);
+}
+
+
+
+static void DUMBCALLBACK dumb_stdfile_close(void *f)
+{
+    dumb_stdfile * file = ( dumb_stdfile * ) f;
+    fclose(file->file);
+    free(f);
+}
+
+
+
+static void DUMBCALLBACK dumb_stdfile_noclose(void *f)
+{
+    free(f);
+}
+
+
+
+static int DUMBCALLBACK dumb_stdfile_seek(void *f, long n)
+{
+    dumb_stdfile * file = ( dumb_stdfile * ) f;
+    return fseek(file->file, n, SEEK_SET);
+}
+
+
+
+static long DUMBCALLBACK dumb_stdfile_get_size(void *f)
+{
+    dumb_stdfile * file = ( dumb_stdfile * ) f;
+    return file->size;
+}
+
+
+
+static const DUMBFILE_SYSTEM stdfile_dfs = {
+	&dumb_stdfile_open,
+	&dumb_stdfile_skip,
+	&dumb_stdfile_getc,
+	&dumb_stdfile_getnc,
+    &dumb_stdfile_close,
+    &dumb_stdfile_seek,
+    &dumb_stdfile_get_size
+};
+
+
+
+void DUMBEXPORT dumb_register_stdfiles(void)
+{
+	register_dumbfile_system(&stdfile_dfs);
+}
+
+
+
+static const DUMBFILE_SYSTEM stdfile_dfs_leave_open = {
+	NULL,
+	&dumb_stdfile_skip,
+	&dumb_stdfile_getc,
+	&dumb_stdfile_getnc,
+    &dumb_stdfile_noclose,
+    &dumb_stdfile_seek,
+    &dumb_stdfile_get_size
+};
+
+
+
+DUMBFILE *DUMBEXPORT dumbfile_open_stdfile(FILE *p)
+{
+    dumb_stdfile * file = ( dumb_stdfile * ) malloc( sizeof(dumb_stdfile) );
+	DUMBFILE *d;
+    if ( !file ) return 0;
+    file->file = p;
+    fseek(p, 0, SEEK_END);
+    file->size = ftell(p);
+    fseek(p, 0, SEEK_SET);
+    d = dumbfile_open_ex(file, &stdfile_dfs_leave_open);
+
+	return d;
+}
diff --git a/libraries/dumb/src/it/itload.c b/libraries/dumb/src/it/itload.c
new file mode 100644
index 000000000..01f7cf019
--- /dev/null
+++ b/libraries/dumb/src/it/itload.c
@@ -0,0 +1,43 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * itload.c - Code to read an Impulse Tracker         / / \  \
+ *            file, opening and closing it for       | <  /   \_
+ *            you.                                   |  \/ /\   /
+ *                                                    \_  /  > /
+ * By entheh. Don't worry Bob, you're credited          | \ / /
+ * in itread.c!                                         |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+/* dumb_load_it_quick(): loads an IT file into a DUH struct, returning a
+ * pointer to the DUH struct. When you have finished with it, you must pass
+ * the pointer to unload_duh() so that the memory can be freed.
+ */
+DUH *DUMBEXPORT dumb_load_it_quick(const char *filename)
+{
+	DUH *duh;
+	DUMBFILE *f = dumbfile_open(filename);
+
+	if (!f)
+		return NULL;
+
+	duh = dumb_read_it_quick(f);
+
+	dumbfile_close(f);
+
+	return duh;
+}
+
diff --git a/libraries/dumb/src/it/itload2.c b/libraries/dumb/src/it/itload2.c
new file mode 100644
index 000000000..68b38cd77
--- /dev/null
+++ b/libraries/dumb/src/it/itload2.c
@@ -0,0 +1,29 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * itload2.c - Function to read an Impulse Tracker    / / \  \
+ *             file, opening and closing it for      | <  /   \_
+ *             you, and do an initial run-through.   |  \/ /\   /
+ *                                                    \_  /  > /
+ * Split off from itload.c by entheh.                   | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+
+
+
+DUH *DUMBEXPORT dumb_load_it(const char *filename)
+{
+	DUH *duh = dumb_load_it_quick(filename);
+	dumb_it_do_initial_runthrough(duh);
+	return duh;
+}
diff --git a/libraries/dumb/src/it/itmisc.c b/libraries/dumb/src/it/itmisc.c
new file mode 100644
index 000000000..389c74736
--- /dev/null
+++ b/libraries/dumb/src/it/itmisc.c
@@ -0,0 +1,249 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * itmisc.c - Miscellaneous functions relating        / / \  \
+ *            to module files.                       | <  /   \_
+ *                                                   |  \/ /\   /
+ * By entheh.                                         \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+int dumb_it_default_panning_separation = 25;
+
+
+DUMB_IT_SIGDATA *DUMBEXPORT duh_get_it_sigdata(DUH *duh)
+{
+	return duh_get_raw_sigdata(duh, -1, SIGTYPE_IT);
+}
+
+
+
+const unsigned char *DUMBEXPORT dumb_it_sd_get_song_message(DUMB_IT_SIGDATA *sd)
+{
+	return sd ? sd->song_message : NULL;
+}
+
+
+
+int DUMBEXPORT dumb_it_sd_get_n_orders(DUMB_IT_SIGDATA *sd)
+{
+	return sd ? sd->n_orders : 0;
+}
+
+
+
+int DUMBEXPORT dumb_it_sd_get_n_samples(DUMB_IT_SIGDATA *sd)
+{
+	return sd ? sd->n_samples : 0;
+}
+
+
+
+int DUMBEXPORT dumb_it_sd_get_n_instruments(DUMB_IT_SIGDATA *sd)
+{
+	return sd ? sd->n_instruments : 0;
+}
+
+
+
+const unsigned char *DUMBEXPORT dumb_it_sd_get_sample_name(DUMB_IT_SIGDATA *sd, int i)
+{
+	ASSERT(sd && sd->sample && i >= 0 && i < sd->n_samples);
+	return sd->sample[i].name;
+}
+
+
+
+const unsigned char *DUMBEXPORT dumb_it_sd_get_sample_filename(DUMB_IT_SIGDATA *sd, int i)
+{
+	ASSERT(sd && sd->sample && i >= 0 && i < sd->n_samples);
+	return sd->sample[i].filename;
+}
+
+
+
+const unsigned char *DUMBEXPORT dumb_it_sd_get_instrument_name(DUMB_IT_SIGDATA *sd, int i)
+{
+	ASSERT(sd && sd->instrument && i >= 0 && i < sd->n_instruments);
+	return sd->instrument[i].name;
+}
+
+
+
+const unsigned char *DUMBEXPORT dumb_it_sd_get_instrument_filename(DUMB_IT_SIGDATA *sd, int i)
+{
+	ASSERT(sd && sd->instrument && i >= 0 && i < sd->n_instruments);
+	return sd->instrument[i].filename;
+}
+
+
+
+int DUMBEXPORT dumb_it_sd_get_initial_global_volume(DUMB_IT_SIGDATA *sd)
+{
+	return sd ? sd->global_volume : 0;
+}
+
+
+
+void DUMBEXPORT dumb_it_sd_set_initial_global_volume(DUMB_IT_SIGDATA *sd, int gv)
+{
+	if (sd) sd->global_volume = gv;
+}
+
+
+
+int DUMBEXPORT dumb_it_sd_get_mixing_volume(DUMB_IT_SIGDATA *sd)
+{
+	return sd ? sd->mixing_volume : 0;
+}
+
+
+
+void DUMBEXPORT dumb_it_sd_set_mixing_volume(DUMB_IT_SIGDATA *sd, int mv)
+{
+	if (sd) sd->mixing_volume = mv;
+}
+
+
+
+int DUMBEXPORT dumb_it_sd_get_initial_speed(DUMB_IT_SIGDATA *sd)
+{
+	return sd ? sd->speed : 0;
+}
+
+
+
+void DUMBEXPORT dumb_it_sd_set_initial_speed(DUMB_IT_SIGDATA *sd, int speed)
+{
+	if (sd) sd->speed = speed;
+}
+
+
+
+int DUMBEXPORT dumb_it_sd_get_initial_tempo(DUMB_IT_SIGDATA *sd)
+{
+	return sd ? sd->tempo : 0;
+}
+
+
+
+void DUMBEXPORT dumb_it_sd_set_initial_tempo(DUMB_IT_SIGDATA *sd, int tempo)
+{
+	if (sd) sd->tempo = tempo;
+}
+
+
+
+int DUMBEXPORT dumb_it_sd_get_initial_channel_volume(DUMB_IT_SIGDATA *sd, int channel)
+{
+	ASSERT(channel >= 0 && channel < DUMB_IT_N_CHANNELS);
+	return sd ? sd->channel_volume[channel] : 0;
+}
+
+void DUMBEXPORT dumb_it_sd_set_initial_channel_volume(DUMB_IT_SIGDATA *sd, int channel, int volume)
+{
+	ASSERT(channel >= 0 && channel < DUMB_IT_N_CHANNELS);
+	if (sd) sd->channel_volume[channel] = volume;
+}
+
+
+
+int DUMBEXPORT dumb_it_sr_get_current_order(DUMB_IT_SIGRENDERER *sr)
+{
+	return sr ? sr->order : -1;
+}
+
+
+
+int DUMBEXPORT dumb_it_sr_get_current_row(DUMB_IT_SIGRENDERER *sr)
+{
+	return sr ? sr->row : -1;
+}
+
+
+
+int DUMBEXPORT dumb_it_sr_get_global_volume(DUMB_IT_SIGRENDERER *sr)
+{
+	return sr ? sr->globalvolume : 0;
+}
+
+
+
+void DUMBEXPORT dumb_it_sr_set_global_volume(DUMB_IT_SIGRENDERER *sr, int gv)
+{
+	if (sr) sr->globalvolume = gv;
+}
+
+
+
+int DUMBEXPORT dumb_it_sr_get_tempo(DUMB_IT_SIGRENDERER *sr)
+{
+	return sr ? sr->tempo : 0;
+}
+
+
+
+void DUMBEXPORT dumb_it_sr_set_tempo(DUMB_IT_SIGRENDERER *sr, int tempo)
+{
+	if (sr) sr->tempo = tempo;
+}
+
+
+
+int DUMBEXPORT dumb_it_sr_get_speed(DUMB_IT_SIGRENDERER *sr)
+{
+	return sr ? sr->speed : 0;
+}
+
+
+
+void DUMBEXPORT dumb_it_sr_set_speed(DUMB_IT_SIGRENDERER *sr, int speed)
+{
+	if (sr) sr->speed = speed;
+}
+
+
+
+int DUMBEXPORT dumb_it_sr_get_channel_volume(DUMB_IT_SIGRENDERER *sr, int channel)
+{
+	return sr ? sr->channel[channel].channelvolume : 0;
+}
+
+
+
+void DUMBEXPORT dumb_it_sr_set_channel_volume(DUMB_IT_SIGRENDERER *sr, int channel, int volume)
+{
+	if (sr) sr->channel[channel].channelvolume = volume;
+}
+
+
+
+void DUMBEXPORT dumb_it_sr_set_channel_muted(DUMB_IT_SIGRENDERER *sr, int channel, int muted)
+{
+	if (sr) {
+		if (muted)
+			sr->channel[channel].flags |= IT_CHANNEL_MUTED;
+		else
+			sr->channel[channel].flags &= ~IT_CHANNEL_MUTED;
+	}
+}
+
+
+
+int DUMBEXPORT dumb_it_sr_get_channel_muted(DUMB_IT_SIGRENDERER *sr, int channel)
+{
+	return sr ? (sr->channel[channel].flags & IT_CHANNEL_MUTED) != 0 : 0;
+}
diff --git a/libraries/dumb/src/it/itorder.c b/libraries/dumb/src/it/itorder.c
new file mode 100644
index 000000000..6959f0544
--- /dev/null
+++ b/libraries/dumb/src/it/itorder.c
@@ -0,0 +1,63 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * itorder.c - Code to fix invalid patterns in        / / \  \
+ *             the pattern table.                    | <  /   \_
+ *                                                   |  \/ /\   /
+ * By Julien Cugniere.                                \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+
+
+#include <stdlib.h>
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+/* This function ensures that any pattern mentioned in the order table but
+ * not present in the pattern table is treated as an empty 64 rows pattern.
+ * This is done by adding such a dummy pattern at the end of the pattern
+ * table, and redirect invalid orders to it.
+ * Patterns 254 and 255 are left untouched, unless the signal is an XM.
+ */
+int _dumb_it_fix_invalid_orders(DUMB_IT_SIGDATA *sigdata)
+{
+	int i;
+	int found_some = 0;
+
+	int first_invalid = sigdata->n_patterns;
+	int last_invalid = (sigdata->flags & IT_WAS_AN_XM) ? 255 : 253;
+
+	for (i = 0; i < sigdata->n_orders; i++) {
+		if (sigdata->order[i] >= first_invalid && sigdata->order[i] <= last_invalid) {
+			sigdata->order[i] = sigdata->n_patterns;
+			found_some = 1;
+		}
+	}
+
+	if (found_some) {
+		IT_PATTERN *new_pattern = realloc(sigdata->pattern, sizeof(*sigdata->pattern) * (sigdata->n_patterns + 1));
+		if (!new_pattern)
+			return -1;
+		
+		new_pattern[sigdata->n_patterns].n_rows = 64;
+		new_pattern[sigdata->n_patterns].n_entries = 0;
+		new_pattern[sigdata->n_patterns].entry = NULL;
+		sigdata->pattern = new_pattern;
+		sigdata->n_patterns++;
+	}
+
+	return 0;
+}
diff --git a/libraries/dumb/src/it/itread.c b/libraries/dumb/src/it/itread.c
new file mode 100644
index 000000000..a226c530f
--- /dev/null
+++ b/libraries/dumb/src/it/itread.c
@@ -0,0 +1,1414 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * itread.c - Code to read an Impulse Tracker         / / \  \
+ *            module from an open file.              | <  /   \_
+ *                                                   |  \/ /\   /
+ * Based on the loader from an IT player by Bob.      \_  /  > /
+ * Adapted for DUMB by entheh.                          | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <stdlib.h>
+#include <string.h>//might not be necessary later; required for memset
+
+#include "dumb.h"
+#include "internal/it.h"
+
+#ifndef min
+#define min(a, b) (((a) < (b)) ? (a) : (b))
+#endif
+
+
+#define INVESTIGATE_OLD_INSTRUMENTS
+
+
+
+typedef unsigned char byte;
+typedef unsigned short word;
+typedef uint32 dword;
+
+typedef struct readblock_crap readblock_crap;
+
+struct readblock_crap {
+	unsigned char *sourcebuf;
+	unsigned char *sourcepos;
+	unsigned char *sourceend;
+	int rembits;
+};
+
+
+static int readblock(DUMBFILE *f, readblock_crap * crap)
+{
+	int32 size;
+	int c;
+
+	size = dumbfile_igetw(f);
+	if (size < 0)
+		return size;
+
+	crap->sourcebuf = malloc(size);
+	if (!crap->sourcebuf)
+		return -1;
+
+	c = dumbfile_getnc((char *)crap->sourcebuf, size, f);
+	if (c < size) {
+		free(crap->sourcebuf);
+		crap->sourcebuf = NULL;
+		return -1;
+	}
+
+	crap->sourcepos = crap->sourcebuf;
+	crap->sourceend = crap->sourcebuf + size;
+	crap->rembits = 8;
+	return 0;
+}
+
+
+
+static void freeblock(readblock_crap * crap)
+{
+	free(crap->sourcebuf);
+	crap->sourcebuf = NULL;
+}
+
+
+
+static int readbits(int bitwidth, readblock_crap * crap)
+{
+	int val = 0;
+	int b = 0;
+
+	if (crap->sourcepos >= crap->sourceend) return val;
+
+	while (bitwidth > crap->rembits) {
+		val |= *crap->sourcepos++ << b;
+		if (crap->sourcepos >= crap->sourceend) return val;
+		b += crap->rembits;
+		bitwidth -= crap->rembits;
+		crap->rembits = 8;
+	}
+
+	val |= (*crap->sourcepos & ((1 << bitwidth) - 1)) << b;
+	*crap->sourcepos >>= bitwidth;
+	crap->rembits -= bitwidth;
+
+	return val;
+}
+
+
+
+/** WARNING - do we even need to pass `right`? */
+/** WARNING - why bother memsetting at all? The whole array is written... */
+// if we do memset, dumb_silence() would be neater...
+static int decompress8(DUMBFILE *f, signed char *data, int len, int it215, int stereo)
+{
+	int blocklen, blockpos;
+	byte bitwidth;
+	word val;
+	char d1, d2;
+	readblock_crap crap;
+
+	memset(&crap, 0, sizeof(crap));
+
+	for (blocklen = 0, blockpos = 0; blocklen < len; blocklen++, blockpos += 1 + stereo)
+		data[ blockpos ] = 0;
+
+	while (len > 0) {
+		//Read a block of compressed data:
+		if (readblock(f, &crap))
+			return -1;
+		//Set up a few variables
+		blocklen = (len < 0x8000) ? len : 0x8000; //Max block length is 0x8000 bytes
+		blockpos = 0;
+		bitwidth = 9;
+		d1 = d2 = 0;
+		//Start the decompression:
+		while (blockpos < blocklen) {
+			//Read a value:
+			val = (word)readbits(bitwidth, &crap);
+			//Check for bit width change:
+
+			if (bitwidth < 7) { //Method 1:
+				if (val == (1 << (bitwidth - 1))) {
+					val = (word)readbits(3, &crap) + 1;
+					bitwidth = (val < bitwidth) ? val : val + 1;
+					continue;
+				}
+			}
+			else if (bitwidth < 9) { //Method 2
+				byte border = (0xFF >> (9 - bitwidth)) - 4;
+
+				if (val > border && val <= (border + 8)) {
+					val -= border;
+					bitwidth = (val < bitwidth) ? val : val + 1;
+					continue;
+				}
+			}
+			else if (bitwidth == 9) { //Method 3
+				if (val & 0x100) {
+					bitwidth = (val + 1) & 0xFF;
+					continue;
+				}
+			}
+			else { //Illegal width, abort ?
+				freeblock(&crap);
+				return -1;
+			}
+
+			//Expand the value to signed byte:
+			{
+				char v; //The sample value:
+				if (bitwidth < 8) {
+					byte shift = 8 - bitwidth;
+					v = (val << shift);
+					v >>= shift;
+				}
+				else
+					v = (char)val;
+
+				//And integrate the sample value
+				//(It always has to end with integration doesn't it ? ;-)
+				d1 += v;
+				d2 += d1;
+			}
+
+			//Store !
+			/* Version 2.15 was an unofficial version with hacked compression
+			 * code. Yay, better compression :D
+			 */
+			*data++ = it215 ? d2 : d1;
+			data += stereo;
+			len--;
+			blockpos++;
+		}
+		freeblock(&crap);
+	}
+	return 0;
+}
+
+
+
+static int decompress16(DUMBFILE *f, short *data, int len, int it215, int stereo)
+{
+	int blocklen, blockpos;
+	byte bitwidth;
+	int32 val;
+	short d1, d2;
+	readblock_crap crap;
+
+	memset(&crap, 0, sizeof(crap));
+
+	for ( blocklen = 0, blockpos = 0; blocklen < len; blocklen++, blockpos += 1 + stereo )
+		data[ blockpos ] = 0;
+
+	while (len > 0) {
+		//Read a block of compressed data:
+		if (readblock(f, &crap))
+			return -1;
+		//Set up a few variables
+		blocklen = (len < 0x4000) ? len : 0x4000; // Max block length is 0x4000 bytes
+		blockpos = 0;
+		bitwidth = 17;
+		d1 = d2 = 0;
+		//Start the decompression:
+		while (blockpos < blocklen) {
+			val = readbits(bitwidth, &crap);
+			//Check for bit width change:
+
+			if (bitwidth < 7) { //Method 1:
+				if (val == (1 << (bitwidth - 1))) {
+					val = readbits(4, &crap) + 1;
+					bitwidth = (byte)((val < bitwidth) ? val : val + 1);
+					continue;
+				}
+			}
+			else if (bitwidth < 17) { //Method 2
+				word border = (0xFFFF >> (17 - bitwidth)) - 8;
+
+				if (val > border && val <= (border + 16)) {
+					val -= border;
+					bitwidth = (byte)(val < bitwidth ? val : val + 1);
+					continue;
+				}
+			}
+			else if (bitwidth == 17) { //Method 3
+				if (val & 0x10000) {
+					bitwidth = (byte)((val + 1) & 0xFF);
+					continue;
+				}
+			}
+			else { //Illegal width, abort ?
+				freeblock(&crap);
+				return -1;
+			}
+
+			//Expand the value to signed byte:
+			{
+				short v; //The sample value:
+				if (bitwidth < 16) {
+					byte shift = 16 - bitwidth;
+					v = (short)(val << shift);
+					v >>= shift;
+				}
+				else
+					v = (short)val;
+
+				//And integrate the sample value
+				//(It always has to end with integration doesn't it ? ;-)
+				d1 += v;
+				d2 += d1;
+			}
+
+			//Store !
+			/* Version 2.15 was an unofficial version with hacked compression
+			 * code. Yay, better compression :D
+			 */
+			*data++ = it215 ? d2 : d1;
+			data += stereo;
+			len--;
+			blockpos++;
+		}
+		freeblock(&crap);
+	}
+	return 0;
+}
+
+
+
+static int it_read_envelope(IT_ENVELOPE *envelope, DUMBFILE *f)
+{
+	int n;
+
+	envelope->flags = dumbfile_getc(f);
+	envelope->n_nodes = dumbfile_getc(f);
+	if(envelope->n_nodes > 25) {
+		TRACE("IT error: wrong number of envelope nodes (%d)\n", envelope->n_nodes);
+		envelope->n_nodes = 0;
+		return -1;
+	}
+	envelope->loop_start = dumbfile_getc(f);
+	envelope->loop_end = dumbfile_getc(f);
+	envelope->sus_loop_start = dumbfile_getc(f);
+	envelope->sus_loop_end = dumbfile_getc(f);
+	for (n = 0; n < envelope->n_nodes; n++) {
+		envelope->node_y[n] = dumbfile_getc(f);
+		envelope->node_t[n] = dumbfile_igetw(f);
+	}
+	dumbfile_skip(f, 75 - envelope->n_nodes * 3 + 1);
+
+	if (envelope->n_nodes <= 0)
+		envelope->flags &= ~IT_ENVELOPE_ON;
+	else {
+		if (envelope->loop_end >= envelope->n_nodes || envelope->loop_start > envelope->loop_end) envelope->flags &= ~IT_ENVELOPE_LOOP_ON;
+		if (envelope->sus_loop_end >= envelope->n_nodes || envelope->sus_loop_start > envelope->sus_loop_end) envelope->flags &= ~IT_ENVELOPE_SUSTAIN_LOOP;
+	}
+
+	return dumbfile_error(f);
+}
+
+
+
+static int it_read_old_instrument(IT_INSTRUMENT *instrument, DUMBFILE *f)
+{
+	int n;
+
+	/*if (dumbfile_mgetl(f) != IT_INSTRUMENT_SIGNATURE)
+		return -1;*/
+	// XXX
+	dumbfile_skip(f, 4);
+
+    dumbfile_getnc((char *)instrument->filename, 13, f);
+	instrument->filename[13] = 0;
+
+	instrument->volume_envelope.flags = dumbfile_getc(f);
+	instrument->volume_envelope.loop_start = dumbfile_getc(f);
+	instrument->volume_envelope.loop_end = dumbfile_getc(f);
+	instrument->volume_envelope.sus_loop_start = dumbfile_getc(f);
+	instrument->volume_envelope.sus_loop_end = dumbfile_getc(f);
+
+	/* Skip two unused bytes. */
+	dumbfile_skip(f, 2);
+
+	/* In the old instrument format, fadeout ranges from 0 to 64, and is
+	 * subtracted at intervals from a value starting at 512. In the new
+	 * format, all these values are doubled. Therefore we double when loading
+	 * from the old instrument format - that way we don't have to think about
+	 * it later.
+	 */
+	instrument->fadeout = dumbfile_igetw(f) << 1;
+	instrument->new_note_action = dumbfile_getc(f);
+	instrument->dup_check_type = dumbfile_getc(f);
+	instrument->dup_check_action = DCA_NOTE_CUT; // This might be wrong!
+	/** WARNING - what is the duplicate check action for old-style instruments? */
+
+	/* Skip Tracker Version and Number of Samples. These are only used in
+	 * separate instrument files. Also skip unused byte.
+	 */
+	dumbfile_skip(f, 4);
+
+    dumbfile_getnc((char *)instrument->name, 26, f);
+	instrument->name[26] = 0;
+
+	/* Skip unused bytes following the Instrument Name. */
+	dumbfile_skip(f, 6);
+
+	instrument->pp_separation = 0;
+	instrument->pp_centre = 60;
+	instrument->global_volume = 128;
+	/** WARNING - should global_volume be 64 or something? */
+	instrument->default_pan = 32;
+	/** WARNING - should default_pan be 128, meaning don`t use? */
+	instrument->random_volume = 0;
+	instrument->random_pan = 0;
+
+	for (n = 0; n < 120; n++) {
+		instrument->map_note[n] = dumbfile_getc(f);
+		instrument->map_sample[n] = dumbfile_getc(f);
+	}
+
+	/* Skip "Volume envelope (200 bytes)". */
+	// - need to know better what this is for though.
+	dumbfile_skip(f, 200);
+
+#ifdef INVESTIGATE_OLD_INSTRUMENTS
+	fprintf(stderr, "Inst %02d Env:", n);
+#endif
+
+	for (n = 0; n < 25; n++)
+	{
+		instrument->volume_envelope.node_t[n] = dumbfile_getc(f);
+		instrument->volume_envelope.node_y[n] = dumbfile_getc(f);
+
+#ifdef INVESTIGATE_OLD_INSTRUMENTS
+		fprintf(stderr, " %d,%d",
+				instrument->volume_envelope.node_t[n],
+				instrument->volume_envelope.node_y[n]);
+#endif
+
+		// This loop is unfinished, as we can probably escape from it before
+		// the end if we want to. Hence the otherwise useless dumbfile_skip()
+		// call below.
+	}
+	dumbfile_skip(f, 50 - (n << 1));
+	instrument->volume_envelope.n_nodes = n;
+
+#ifdef INVESTIGATE_OLD_INSTRUMENTS
+	fprintf(stderr, "\n");
+#endif
+
+	if (dumbfile_error(f))
+		return -1;
+
+	{
+		IT_ENVELOPE *envelope = &instrument->volume_envelope;
+		if (envelope->n_nodes <= 0)
+			envelope->flags &= ~IT_ENVELOPE_ON;
+		else {
+			if (envelope->loop_end >= envelope->n_nodes || envelope->loop_start > envelope->loop_end) envelope->flags &= ~IT_ENVELOPE_LOOP_ON;
+			if (envelope->sus_loop_end >= envelope->n_nodes || envelope->sus_loop_start > envelope->sus_loop_end) envelope->flags &= ~IT_ENVELOPE_SUSTAIN_LOOP;
+		}
+	}
+
+	instrument->filter_cutoff = 127;
+	instrument->filter_resonance = 0;
+
+	instrument->pan_envelope.flags = 0;
+	instrument->pitch_envelope.flags = 0;
+
+	return 0;
+}
+
+
+
+static int it_read_instrument(IT_INSTRUMENT *instrument, DUMBFILE *f, int maxlen)
+{
+	int n, len = 0;
+
+	/*if (dumbfile_mgetl(f) != IT_INSTRUMENT_SIGNATURE)
+		return -1;*/
+	// XXX
+
+	if (maxlen) len = dumbfile_pos(f);
+
+	dumbfile_skip(f, 4);
+
+    dumbfile_getnc((char *)instrument->filename, 13, f);
+	instrument->filename[13] = 0;
+
+	instrument->new_note_action = dumbfile_getc(f);
+	instrument->dup_check_type = dumbfile_getc(f);
+	instrument->dup_check_action = dumbfile_getc(f);
+	instrument->fadeout = dumbfile_igetw(f);
+	instrument->pp_separation = dumbfile_getc(f);
+	instrument->pp_centre = dumbfile_getc(f);
+	instrument->global_volume = dumbfile_getc(f);
+	instrument->default_pan = dumbfile_getc(f);
+	instrument->random_volume = dumbfile_getc(f);
+	instrument->random_pan = dumbfile_getc(f);
+
+	/* Skip Tracker Version and Number of Samples. These are only used in
+	 * separate instrument files. Also skip unused byte.
+	 */
+	dumbfile_skip(f, 4);
+
+    dumbfile_getnc((char *)instrument->name, 26, f);
+	instrument->name[26] = 0;
+
+	instrument->filter_cutoff = dumbfile_getc(f);
+	instrument->filter_resonance = dumbfile_getc(f);
+
+	/* Skip MIDI Channel, Program and Bank. */
+	//dumbfile_skip(f, 4);
+	/*instrument->output = dumbfile_getc(f);
+	if ( instrument->output > 16 ) {
+		instrument->output -= 128;
+	} else {
+		instrument->output = 0;
+	}
+	dumbfile_skip(f, 3);*/
+	dumbfile_skip(f, 4);
+
+	for (n = 0; n < 120; n++) {
+		instrument->map_note[n] = dumbfile_getc(f);
+		instrument->map_sample[n] = dumbfile_getc(f);
+	}
+
+	if (dumbfile_error(f))
+		return -1;
+
+	if (it_read_envelope(&instrument->volume_envelope, f)) return -1;
+	if (it_read_envelope(&instrument->pan_envelope, f)) return -1;
+	if (it_read_envelope(&instrument->pitch_envelope, f)) return -1;
+
+	if (maxlen) {
+		len = dumbfile_pos(f) - len;
+		if ( maxlen - len < 124 ) return 0;
+	}
+
+	if ( dumbfile_mgetl(f) == IT_MPTX_SIGNATURE ) {
+		for ( n = 0; n < 120; n++ ) {
+			instrument->map_sample[ n ] += dumbfile_getc( f ) << 8;
+		}
+
+		if (dumbfile_error(f))
+			return -1;
+	}
+
+	/*if ( dumbfile_mgetl(f) == IT_INSM_SIGNATURE ) {
+		int32 end = dumbfile_igetl(f);
+		end += dumbfile_pos(f);
+		while ( dumbfile_pos(f) < end ) {
+			int chunkid = dumbfile_igetl(f);
+			switch ( chunkid ) {
+				case DUMB_ID('P','L','U','G'):
+					instrument->output = dumbfile_getc(f);
+					break;
+				default:
+					chunkid = chunkid / 0x100 + dumbfile_getc(f) * 0x1000000;
+					break;
+			}
+		}
+
+		if (dumbfile_error(f))
+			return -1;
+	}*/
+
+	return 0;
+}
+
+
+
+static int it_read_sample_header(IT_SAMPLE *sample, unsigned char *convert, int32 *offset, DUMBFILE *f)
+{
+	/* XXX
+	if (dumbfile_mgetl(f) != IT_SAMPLE_SIGNATURE)
+		return -1;*/
+	int hax = 0;
+	int32 s = dumbfile_mgetl(f);
+	if (s != IT_SAMPLE_SIGNATURE) {
+		if ( s == ( IT_SAMPLE_SIGNATURE >> 16 ) ) {
+			s <<= 16;
+			s |= dumbfile_mgetw(f);
+			if ( s != IT_SAMPLE_SIGNATURE )
+				return -1;
+			hax = 1;
+		}
+	}
+
+    dumbfile_getnc((char *)sample->filename, 13, f);
+	sample->filename[13] = 0;
+
+	sample->global_volume = dumbfile_getc(f);
+	sample->flags = dumbfile_getc(f);
+	sample->default_volume = dumbfile_getc(f);
+
+    dumbfile_getnc((char *)sample->name, 26, f);
+	sample->name[26] = 0;
+
+	*convert = dumbfile_getc(f);
+	sample->default_pan = dumbfile_getc(f);
+	sample->length = dumbfile_igetl(f);
+	sample->loop_start = dumbfile_igetl(f);
+	sample->loop_end = dumbfile_igetl(f);
+	sample->C5_speed = dumbfile_igetl(f);
+	sample->sus_loop_start = dumbfile_igetl(f);
+	sample->sus_loop_end = dumbfile_igetl(f);
+
+#ifdef STEREO_SAMPLES_COUNT_AS_TWO
+	if (sample->flags & IT_SAMPLE_STEREO) {
+		sample->length >>= 1;
+		sample->loop_start >>= 1;
+		sample->loop_end >>= 1;
+		sample->C5_speed >>= 1;
+		sample->sus_loop_start >>= 1;
+		sample->sus_loop_end >>= 1;
+	}
+#endif
+
+	if (sample->flags & IT_SAMPLE_EXISTS) {
+		if (sample->length <= 0)
+			sample->flags &= ~IT_SAMPLE_EXISTS;
+		else {
+			if ((unsigned int)sample->loop_end > (unsigned int)sample->length)
+				sample->flags &= ~IT_SAMPLE_LOOP;
+			else if ((unsigned int)sample->loop_start >= (unsigned int)sample->loop_end)
+				sample->flags &= ~IT_SAMPLE_LOOP;
+
+			if ((unsigned int)sample->sus_loop_end > (unsigned int)sample->length)
+				sample->flags &= ~IT_SAMPLE_SUS_LOOP;
+			else if ((unsigned int)sample->sus_loop_start >= (unsigned int)sample->sus_loop_end)
+				sample->flags &= ~IT_SAMPLE_SUS_LOOP;
+
+			/* We may be able to truncate the sample to save memory. */
+			if (sample->flags & IT_SAMPLE_LOOP &&
+				*convert != 0xFF) { /* not truncating compressed samples, for now... */
+				if ((sample->flags & IT_SAMPLE_SUS_LOOP) && sample->sus_loop_end >= sample->loop_end)
+					sample->length = sample->sus_loop_end;
+				else
+					sample->length = sample->loop_end;
+			}
+		}
+	}
+
+	*offset = dumbfile_igetl(f);
+
+	sample->vibrato_speed = dumbfile_getc(f);
+	sample->vibrato_depth = dumbfile_getc(f);
+	if ( ! hax ) {
+		sample->vibrato_rate = dumbfile_getc(f);
+		sample->vibrato_waveform = dumbfile_getc(f);
+	} else {
+		sample->vibrato_rate = 0;
+		sample->vibrato_waveform = 0;
+	}
+	sample->finetune = 0;
+	sample->max_resampling_quality = -1;
+
+	return dumbfile_error(f);
+}
+
+int32 _dumb_it_read_sample_data_adpcm4(IT_SAMPLE *sample, DUMBFILE *f)
+{
+	int32 n, len, delta;
+	signed char * ptr, * end;
+	signed char compression_table[16];
+    if (dumbfile_getnc((char *)compression_table, 16, f) != 16)
+        return -1;
+	ptr = (signed char *) sample->data;
+	delta = 0;
+
+	end = ptr + sample->length;
+	len = (sample->length + 1) / 2;
+	for (n = 0; n < len; n++) {
+		int b = dumbfile_getc(f);
+		if (b < 0) return -1;
+		delta += compression_table[b & 0x0F];
+		*ptr++ = (signed char)delta;
+		if (ptr >= end) break;
+		delta += compression_table[b >> 4];
+		*ptr++ = (signed char)delta;
+	}
+
+	return 0;
+}
+
+
+static int32 it_read_sample_data(IT_SAMPLE *sample, unsigned char convert, DUMBFILE *f)
+{
+	int32 n;
+
+	int32 datasize = sample->length;
+	if (sample->flags & IT_SAMPLE_STEREO) datasize <<= 1;
+
+	sample->data = malloc(datasize * (sample->flags & IT_SAMPLE_16BIT ? 2 : 1));
+	if (!sample->data)
+		return -1;
+
+	if (!(sample->flags & IT_SAMPLE_16BIT) && (convert == 0xFF)) {
+		if (_dumb_it_read_sample_data_adpcm4(sample, f) < 0)
+			return -1;
+	} else if (sample->flags & 8) {
+		/* If the sample is packed, then we must unpack it. */
+
+		/* Behavior as defined by greasemonkey's munch.py and observed by XMPlay and OpenMPT */
+
+		if (sample->flags & IT_SAMPLE_STEREO) {
+			if (sample->flags & IT_SAMPLE_16BIT) {
+				decompress16(f, (short *) sample->data, datasize >> 1, convert & 4, 1);
+				decompress16(f, (short *) sample->data + 1, datasize >> 1, convert & 4, 1);
+			} else {
+				decompress8(f, (signed char *) sample->data, datasize >> 1, convert & 4, 1);
+				decompress8(f, (signed char *) sample->data + 1, datasize >> 1, convert & 4, 1);
+			}
+		} else {
+			if (sample->flags & IT_SAMPLE_16BIT)
+				decompress16(f, (short *) sample->data, datasize, convert & 4, 0);
+			else
+				decompress8(f, (signed char *) sample->data, datasize, convert & 4, 0);
+		}
+ 	} else if (sample->flags & IT_SAMPLE_16BIT) {
+		if (sample->flags & IT_SAMPLE_STEREO) {
+			if (convert & 2) {
+				for (n = 0; n < datasize; n += 2)
+					((short *)sample->data)[n] = dumbfile_mgetw(f);
+				for (n = 1; n < datasize; n += 2)
+					((short *)sample->data)[n] = dumbfile_mgetw(f);
+			} else {
+				for (n = 0; n < datasize; n += 2)
+					((short *)sample->data)[n] = dumbfile_igetw(f);
+				for (n = 1; n < datasize; n += 2)
+					((short *)sample->data)[n] = dumbfile_igetw(f);
+			}
+		} else {
+ 			if (convert & 2)
+				for (n = 0; n < datasize; n++)
+					((short *)sample->data)[n] = dumbfile_mgetw(f);
+			else
+				for (n = 0; n < datasize; n++)
+					((short *)sample->data)[n] = dumbfile_igetw(f);
+		}
+ 	} else {
+		if (sample->flags & IT_SAMPLE_STEREO) {
+			for (n = 0; n < datasize; n += 2)
+				((signed char *)sample->data)[n] = dumbfile_getc(f);
+			for (n = 1; n < datasize; n += 2)
+				((signed char *)sample->data)[n] = dumbfile_getc(f);
+		} else
+			for (n = 0; n < datasize; n++)
+				((signed char *)sample->data)[n] = dumbfile_getc(f);
+	}
+
+	if (dumbfile_error(f))
+		return -1;
+
+	if (!(convert & 1)) {
+		/* Convert to signed. */
+		if (sample->flags & IT_SAMPLE_16BIT)
+			for (n = 0; n < datasize; n++)
+				((short *)sample->data)[n] ^= 0x8000;
+		else
+			for (n = 0; n < datasize; n++)
+				((signed char *)sample->data)[n] ^= 0x80;
+	}
+
+	/* NOT SUPPORTED:
+	 *
+	 * convert &  4 - Samples stored as delta values
+	 * convert & 16 - Samples stored as TX-Wave 12-bit values
+	 * convert & 32 - Left/Right/All Stereo prompt
+	 */
+
+	return 0;
+}
+
+
+
+//#define DETECT_DUPLICATE_CHANNELS
+#ifdef DETECT_DUPLICATE_CHANNELS
+#include <stdio.h>
+#endif
+static int it_read_pattern(IT_PATTERN *pattern, DUMBFILE *f, unsigned char *buffer)
+{
+	unsigned char cmask[DUMB_IT_N_CHANNELS];
+	unsigned char cnote[DUMB_IT_N_CHANNELS];
+	unsigned char cinstrument[DUMB_IT_N_CHANNELS];
+	unsigned char cvolpan[DUMB_IT_N_CHANNELS];
+	unsigned char ceffect[DUMB_IT_N_CHANNELS];
+	unsigned char ceffectvalue[DUMB_IT_N_CHANNELS];
+#ifdef DETECT_DUPLICATE_CHANNELS
+	IT_ENTRY *dupentry[DUMB_IT_N_CHANNELS];
+#endif
+
+	int n_entries = 0;
+	int buflen;
+	int bufpos = 0;
+
+	IT_ENTRY *entry;
+
+	unsigned char channel;
+	unsigned char mask;
+
+	memset(cmask, 0, sizeof(cmask));
+	memset(cnote, 0, sizeof(cnote));
+	memset(cinstrument, 0, sizeof(cinstrument));
+	memset(cvolpan, 0, sizeof(cvolpan));
+	memset(ceffect, 0, sizeof(ceffect));
+	memset(ceffectvalue, 0, sizeof(ceffectvalue));
+#ifdef DETECT_DUPLICATE_CHANNELS
+	{
+		int i;
+		for (i = 0; i < DUMB_IT_N_CHANNELS; i++) dupentry[i] = NULL;
+	}
+#endif
+
+	buflen = dumbfile_igetw(f);
+	pattern->n_rows = dumbfile_igetw(f);
+
+	/* Skip four unused bytes. */
+	dumbfile_skip(f, 4);
+
+	if (dumbfile_error(f))
+		return -1;
+
+	/* Read in the pattern data. */
+    dumbfile_getnc((char *)buffer, buflen, f);
+
+	if (dumbfile_error(f))
+		return -1;
+
+	/* Scan the pattern data, and work out how many entries we need room for. */
+	while (bufpos < buflen) {
+		unsigned char b = buffer[bufpos++];
+
+		if (b == 0) {
+			/* End of row */
+			n_entries++;
+			continue;
+		}
+
+		channel = (b - 1) & 63;
+
+		if (b & 128)
+			cmask[channel] = mask = buffer[bufpos++];
+		else
+			mask = cmask[channel];
+
+		{
+			static const unsigned char used[16] = {0, 1, 1, 2, 1, 2, 2, 3, 2, 3, 3, 4, 3, 4, 4, 5};
+			n_entries += (mask != 0);
+			bufpos += used[mask & 15];
+		}
+	}
+
+	pattern->n_entries = n_entries;
+
+	pattern->entry = malloc(n_entries * sizeof(*pattern->entry));
+
+	if (!pattern->entry)
+		return -1;
+
+	bufpos = 0;
+	memset(cmask, 0, sizeof(cmask));
+
+	entry = pattern->entry;
+
+	while (bufpos < buflen) {
+		unsigned char b = buffer[bufpos++];
+
+		if (b == 0) {
+			/* End of row */
+			IT_SET_END_ROW(entry);
+			entry++;
+#ifdef DETECT_DUPLICATE_CHANNELS
+			{
+				int i;
+				for (i = 0; i < DUMB_IT_N_CHANNELS; i++) dupentry[i] = NULL;
+			}
+#endif
+			continue;
+		}
+
+		channel = (b - 1) & 63;
+
+		if (b & 128)
+			cmask[channel] = mask = buffer[bufpos++];
+		else
+			mask = cmask[channel];
+
+		if (mask) {
+			entry->mask = (mask & 15) | (mask >> 4);
+			entry->channel = channel;
+
+			if (mask & IT_ENTRY_NOTE)
+				cnote[channel] = entry->note = buffer[bufpos++];
+			else if (mask & (IT_ENTRY_NOTE << 4))
+				entry->note = cnote[channel];
+
+			if (mask & IT_ENTRY_INSTRUMENT)
+				cinstrument[channel] = entry->instrument = buffer[bufpos++];
+			else if (mask & (IT_ENTRY_INSTRUMENT << 4))
+				entry->instrument = cinstrument[channel];
+
+			if (mask & IT_ENTRY_VOLPAN)
+				cvolpan[channel] = entry->volpan = buffer[bufpos++];
+			else if (mask & (IT_ENTRY_VOLPAN << 4))
+				entry->volpan = cvolpan[channel];
+
+			if (mask & IT_ENTRY_EFFECT) {
+				ceffect[channel] = entry->effect = buffer[bufpos++];
+				ceffectvalue[channel] = entry->effectvalue = buffer[bufpos++];
+			} else {
+				entry->effect = ceffect[channel];
+				entry->effectvalue = ceffectvalue[channel];
+			}
+
+#ifdef DETECT_DUPLICATE_CHANNELS
+			if (dupentry[channel]) {
+				FILE *f = fopen("dupentry.txt", "a");
+				if (!f) abort();
+				fprintf(f, "Two events on channel %d:", channel);
+				fprintf(f, "  Event #1:");
+				if (dupentry[channel]->mask & IT_ENTRY_NOTE      ) fprintf(f, " %03d", dupentry[channel]->note      ); else fprintf(f, " ...");
+				if (dupentry[channel]->mask & IT_ENTRY_INSTRUMENT) fprintf(f, " %03d", dupentry[channel]->instrument); else fprintf(f, " ...");
+				if (dupentry[channel]->mask & IT_ENTRY_VOLPAN    ) fprintf(f, " %03d", dupentry[channel]->volpan    ); else fprintf(f, " ...");
+				if (dupentry[channel]->mask & IT_ENTRY_EFFECT) fprintf(f, " %c%02X\n", 'A' - 1 + dupentry[channel]->effect, dupentry[channel]->effectvalue); else fprintf(f, " ...\n");
+				fprintf(f, "  Event #2:");
+				if (entry->mask & IT_ENTRY_NOTE      ) fprintf(f, " %03d", entry->note      ); else fprintf(f, " ...");
+				if (entry->mask & IT_ENTRY_INSTRUMENT) fprintf(f, " %03d", entry->instrument); else fprintf(f, " ...");
+				if (entry->mask & IT_ENTRY_VOLPAN    ) fprintf(f, " %03d", entry->volpan    ); else fprintf(f, " ...");
+				if (entry->mask & IT_ENTRY_EFFECT) fprintf(f, " %c%02X\n", 'A' - 1 + entry->effect, entry->effectvalue); else fprintf(f, " ...\n");
+				fclose(f);
+			}
+			dupentry[channel] = entry;
+#endif
+
+			entry++;
+		}
+	}
+
+	ASSERT(entry == pattern->entry + n_entries);
+
+	return 0;
+}
+
+
+
+/* Currently we assume the sample data are stored after the sample headers in
+ * module files. This assumption may be unjustified; let me know if you have
+ * trouble.
+ */
+
+#define IT_COMPONENT_SONG_MESSAGE 1
+#define IT_COMPONENT_INSTRUMENT   2
+#define IT_COMPONENT_PATTERN      3
+#define IT_COMPONENT_SAMPLE       4
+
+typedef struct IT_COMPONENT
+{
+	unsigned char type;
+	unsigned short n;
+	int32 offset;
+	short sampfirst; /* component[sampfirst] = first sample data after this */
+	short sampnext; /* sampnext is used to create linked lists of sample data */
+}
+IT_COMPONENT;
+
+
+
+static int CDECL it_component_compare(const void *e1, const void *e2)
+{
+	return ((const IT_COMPONENT *)e1)->offset -
+	       ((const IT_COMPONENT *)e2)->offset;
+}
+
+
+
+static sigdata_t *it_load_sigdata(DUMBFILE *f)
+{
+	DUMB_IT_SIGDATA *sigdata;
+
+	int cwt, cmwt;
+	int special;
+	int message_length, message_offset;
+
+	IT_COMPONENT *component;
+	int n_components = 0;
+
+	unsigned char sample_convert[4096];
+
+	int n;
+
+	unsigned char *buffer;
+
+	if (dumbfile_mgetl(f) != IT_SIGNATURE)
+    {
+		return NULL;
+    }
+
+	sigdata = malloc(sizeof(*sigdata));
+
+	if (!sigdata)
+    {
+		return NULL;
+    }
+
+	sigdata->song_message = NULL;
+	sigdata->order = NULL;
+	sigdata->instrument = NULL;
+	sigdata->sample = NULL;
+	sigdata->pattern = NULL;
+	sigdata->midi = NULL;
+	sigdata->checkpoint = NULL;
+
+    dumbfile_getnc((char *)sigdata->name, 26, f);
+	sigdata->name[26] = 0;
+
+	/* Skip pattern row highlight info. */
+	dumbfile_skip(f, 2);
+
+	sigdata->n_orders = dumbfile_igetw(f);
+	sigdata->n_instruments = dumbfile_igetw(f);
+	sigdata->n_samples = dumbfile_igetw(f);
+	sigdata->n_patterns = dumbfile_igetw(f);
+
+	cwt = dumbfile_igetw(f);
+	cmwt = dumbfile_igetw(f);
+
+	sigdata->flags = dumbfile_igetw(f);
+	special = dumbfile_igetw(f);
+
+	sigdata->global_volume = dumbfile_getc(f);
+	sigdata->mixing_volume = dumbfile_getc(f);
+	sigdata->speed = dumbfile_getc(f);
+	if (sigdata->speed == 0) sigdata->speed = 6; // Should we? What about tempo?
+	sigdata->tempo = dumbfile_getc(f);
+	sigdata->pan_separation = dumbfile_getc(f); /** WARNING: use this */
+
+	/* Skip Pitch Wheel Depth */
+	dumbfile_skip(f, 1);
+
+	message_length = dumbfile_igetw(f);
+	message_offset = dumbfile_igetl(f);
+
+	/* Skip Reserved. */
+	dumbfile_skip(f, 4);
+
+    dumbfile_getnc((char *)sigdata->channel_pan, DUMB_IT_N_CHANNELS, f);
+    dumbfile_getnc((char *)sigdata->channel_volume, DUMB_IT_N_CHANNELS, f);
+
+	// XXX sample count
+	if (dumbfile_error(f) || sigdata->n_orders <= 0 || sigdata->n_instruments > 256 || sigdata->n_samples > 4000 || sigdata->n_patterns > 256) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+	sigdata->order = malloc(sigdata->n_orders);
+	if (!sigdata->order) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+	if (sigdata->n_instruments) {
+		sigdata->instrument = malloc(sigdata->n_instruments * sizeof(*sigdata->instrument));
+		if (!sigdata->instrument) {
+			_dumb_it_unload_sigdata(sigdata);
+			return NULL;
+		}
+	}
+
+	if (sigdata->n_samples) {
+		sigdata->sample = malloc(sigdata->n_samples * sizeof(*sigdata->sample));
+		if (!sigdata->sample) {
+			_dumb_it_unload_sigdata(sigdata);
+			return NULL;
+		}
+		for (n = 0; n < sigdata->n_samples; n++)
+			sigdata->sample[n].data = NULL;
+	}
+
+	if (sigdata->n_patterns) {
+		sigdata->pattern = malloc(sigdata->n_patterns * sizeof(*sigdata->pattern));
+		if (!sigdata->pattern) {
+			_dumb_it_unload_sigdata(sigdata);
+			return NULL;
+		}
+		for (n = 0; n < sigdata->n_patterns; n++)
+			sigdata->pattern[n].entry = NULL;
+	}
+
+    dumbfile_getnc((char *)sigdata->order, sigdata->n_orders, f);
+	sigdata->restart_position = 0;
+
+	component = malloc(769 * sizeof(*component));
+	if (!component) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+	if (special & 1) {
+		component[n_components].type = IT_COMPONENT_SONG_MESSAGE;
+		component[n_components].offset = message_offset;
+		component[n_components].sampfirst = -1;
+		n_components++;
+	}
+
+	for (n = 0; n < sigdata->n_instruments; n++) {
+		component[n_components].type = IT_COMPONENT_INSTRUMENT;
+		component[n_components].n = n;
+		component[n_components].offset = dumbfile_igetl(f);
+		component[n_components].sampfirst = -1;
+		n_components++;
+	}
+
+	for (n = 0; n < sigdata->n_samples; n++) {
+		component[n_components].type = IT_COMPONENT_SAMPLE;
+		component[n_components].n = n;
+		component[n_components].offset = dumbfile_igetl(f);
+		component[n_components].sampfirst = -1;
+		n_components++;
+	}
+
+	for (n = 0; n < sigdata->n_patterns; n++) {
+		int32 offset = dumbfile_igetl(f);
+		if (offset) {
+			component[n_components].type = IT_COMPONENT_PATTERN;
+			component[n_components].n = n;
+			component[n_components].offset = offset;
+			component[n_components].sampfirst = -1;
+			n_components++;
+		} else {
+			/* Empty 64-row pattern */
+			sigdata->pattern[n].n_rows = 64;
+			sigdata->pattern[n].n_entries = 0;
+		}
+	}
+
+	if (dumbfile_error(f)) {
+		free(component);
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+	/*
+	if (!(sigdata->flags & 128) != !(special & 8)) {
+		fprintf(stderr, "Flags   Bit 7 (\"Request embedded MIDI configuration\"): %s\n", sigdata->flags & 128 ? "=SET=" : "clear");
+		fprintf(stderr, "Special Bit 3     (\"MIDI configuration embedded\")    : %s\n", special        &   8 ? "=SET=" : "clear");
+		fprintf(stderr, "entheh would like to investigate this IT file.\n");
+		fprintf(stderr, "Please contact him! entheh@users.sf.net\n");
+	}
+	*/
+
+	if (special & 8) {
+		/* MIDI configuration is embedded. */
+		unsigned char mididata[32];
+		int i;
+		sigdata->midi = malloc(sizeof(*sigdata->midi));
+		if (!sigdata->midi) {
+			free(component);
+			_dumb_it_unload_sigdata(sigdata);
+			return NULL;
+			// Should we be happy with this outcome in some situations?
+		}
+		// What are we skipping?
+		i = dumbfile_igetw(f);
+		if (dumbfile_error(f) || dumbfile_skip(f, 8*i)) {
+			free(component);
+			_dumb_it_unload_sigdata(sigdata);
+			return NULL;
+		}
+		/* Read embedded MIDI configuration */
+		// What are the first 9 commands for?
+		if (dumbfile_skip(f, 32*9)) {
+			free(component);
+			_dumb_it_unload_sigdata(sigdata);
+			return NULL;
+		}
+		for (i = 0; i < 16; i++) {
+			unsigned char len = 0;
+			int j, leftdigit = -1;
+            if (dumbfile_getnc((char *)mididata, 32, f) < 32) {
+				free(component);
+				_dumb_it_unload_sigdata(sigdata);
+				return NULL;
+			}
+			sigdata->midi->SFmacroz[i] = 0;
+			for (j = 0; j < 32; j++) {
+				if (leftdigit >= 0) {
+					if (mididata[j] == 0) {
+						sigdata->midi->SFmacro[i][len++] = leftdigit;
+						break;
+					} else if (mididata[j] == ' ')
+						sigdata->midi->SFmacro[i][len++] = leftdigit;
+					else if (mididata[j] >= '0' && mididata[j] <= '9')
+						sigdata->midi->SFmacro[i][len++] = (leftdigit << 4) | (mididata[j] - '0');
+					else if (mididata[j] >= 'A' && mididata[j] <= 'F')
+						sigdata->midi->SFmacro[i][len++] = (leftdigit << 4) | (mididata[j] - 'A' + 0xA);
+					leftdigit = -1;
+				} else if (mididata[j] == 0)
+					break;
+				else if (mididata[j] == 'z')
+					sigdata->midi->SFmacroz[i] |= 1 << len++;
+				else if (mididata[j] >= '0' && mididata[j] <= '9')
+					leftdigit = mididata[j] - '0';
+				else if (mididata[j] >= 'A' && mididata[j] <= 'F')
+					leftdigit = mididata[j] - 'A' + 0xA;
+			}
+			sigdata->midi->SFmacrolen[i] = len;
+		}
+		for (i = 0; i < 128; i++) {
+			unsigned char len = 0;
+			int j, leftdigit = -1;
+            dumbfile_getnc((char *)mididata, 32, f);
+			for (j = 0; j < 32; j++) {
+				if (leftdigit >= 0) {
+					if (mididata[j] == 0) {
+						sigdata->midi->Zmacro[i][len++] = leftdigit;
+						break;
+					} else if (mididata[j] == ' ')
+						sigdata->midi->Zmacro[i][len++] = leftdigit;
+					else if (mididata[j] >= '0' && mididata[j] <= '9')
+						sigdata->midi->Zmacro[i][len++] = (leftdigit << 4) | (mididata[j] - '0');
+					else if (mididata[j] >= 'A' && mididata[j] <= 'F')
+						sigdata->midi->Zmacro[i][len++] = (leftdigit << 4) | (mididata[j] - 'A' + 0xA);
+					leftdigit = -1;
+				} else if (mididata[j] == 0)
+					break;
+				else if (mididata[j] >= '0' && mididata[j] <= '9')
+					leftdigit = mididata[j] - '0';
+				else if (mididata[j] >= 'A' && mididata[j] <= 'F')
+					leftdigit = mididata[j] - 'A' + 0xA;
+			}
+			sigdata->midi->Zmacrolen[i] = len;
+		}
+	}
+
+	sigdata->flags &= IT_REAL_FLAGS;
+
+    qsort(component, n_components, sizeof(IT_COMPONENT), &it_component_compare);
+
+	buffer = malloc(65536);
+	if (!buffer) {
+		free(component);
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+	for (n = 0; n < n_components; n++) {
+		int32 offset;
+		int m;
+
+		/* XXX */
+		if ( component[n].offset == 0 ) {
+			switch (component[n].type) {
+				case IT_COMPONENT_INSTRUMENT:
+					memset( &sigdata->instrument[component[n].n], 0, sizeof(IT_INSTRUMENT) );
+					break;
+				case IT_COMPONENT_SAMPLE:
+					memset( &sigdata->sample[component[n].n], 0, sizeof(IT_SAMPLE) );
+					break;
+				case IT_COMPONENT_PATTERN:
+					{
+						IT_PATTERN * p = &sigdata->pattern[component[n].n];
+						p->entry = 0;
+						p->n_rows = 64;
+						p->n_entries = 0;
+					}
+					break;
+			}
+			continue;
+		}
+
+        if (dumbfile_seek(f, component[n].offset, DFS_SEEK_SET)) {
+			free(buffer);
+			free(component);
+			_dumb_it_unload_sigdata(sigdata);
+			return NULL;
+		}
+
+		switch (component[n].type) {
+
+			case IT_COMPONENT_SONG_MESSAGE:
+				if ( n < n_components ) {
+					message_length = min( message_length, component[n+1].offset - component[n].offset );
+				}
+				sigdata->song_message = malloc(message_length + 1);
+				if (sigdata->song_message) {
+                    if (dumbfile_getnc((char *)sigdata->song_message, message_length, f) < message_length) {
+						free(buffer);
+						free(component);
+						_dumb_it_unload_sigdata(sigdata);
+						return NULL;
+					}
+					sigdata->song_message[message_length] = 0;
+				}
+				break;
+
+			case IT_COMPONENT_INSTRUMENT:
+				if (cmwt < 0x200)
+					m = it_read_old_instrument(&sigdata->instrument[component[n].n], f);
+				else
+					m = it_read_instrument(&sigdata->instrument[component[n].n], f, (n + 1 < n_components) ? (component[n+1].offset - component[n].offset) : 0);
+
+				if (m) {
+					free(buffer);
+					free(component);
+					_dumb_it_unload_sigdata(sigdata);
+					return NULL;
+				}
+				break;
+
+			case IT_COMPONENT_PATTERN:
+				if (it_read_pattern(&sigdata->pattern[component[n].n], f, buffer)) {
+					free(buffer);
+					free(component);
+					_dumb_it_unload_sigdata(sigdata);
+					return NULL;
+				}
+				break;
+
+			case IT_COMPONENT_SAMPLE:
+				if (it_read_sample_header(&sigdata->sample[component[n].n], &sample_convert[component[n].n], &offset, f)) {
+					free(buffer);
+					free(component);
+					_dumb_it_unload_sigdata(sigdata);
+					return NULL;
+				}
+
+				if (sigdata->sample[component[n].n].flags & IT_SAMPLE_EXISTS) {
+					short *sample;
+
+					for (m = n + 1; m < n_components; m++)
+						if (component[m].offset > offset)
+							break;
+					m--;
+
+					sample = &component[m].sampfirst;
+
+					while (*sample >= 0 && component[*sample].offset <= offset)
+						sample = &component[*sample].sampnext;
+
+					component[n].sampnext = *sample;
+					*sample = n;
+
+					component[n].offset = offset;
+				}
+		}
+
+		m = component[n].sampfirst;
+
+		while (m >= 0) {
+            if (dumbfile_seek(f, component[m].offset, DFS_SEEK_SET)) {
+				free(buffer);
+				free(component);
+				_dumb_it_unload_sigdata(sigdata);
+				return NULL;
+			}
+
+			if (it_read_sample_data(&sigdata->sample[component[m].n], sample_convert[component[m].n], f)) {
+				free(buffer);
+				free(component);
+				_dumb_it_unload_sigdata(sigdata);
+				return NULL;
+			}
+
+			m = component[m].sampnext;
+		}
+    }
+
+    for ( n = 0; n < 10; n++ )
+    {
+        if ( dumbfile_getc( f ) == 'X' )
+        {
+            if ( dumbfile_getc( f ) == 'T' )
+            {
+                if ( dumbfile_getc( f ) == 'P' )
+                {
+                    if ( dumbfile_getc( f ) == 'M' )
+                    {
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    if ( !dumbfile_error( f ) && n < 10 )
+    {
+        unsigned int mptx_id = dumbfile_igetl( f );
+        while ( !dumbfile_error( f ) && mptx_id != DUMB_ID('M','P','T','S') )
+        {
+            unsigned int size = dumbfile_igetw( f );
+            switch (mptx_id)
+            {
+            /* TODO: Add instrument extension readers */
+
+            default:
+                dumbfile_skip(f, size * sigdata->n_instruments);
+                break;
+            }
+
+            mptx_id = dumbfile_igetl( f );
+        }
+
+        mptx_id = dumbfile_igetl( f );
+        while ( !dumbfile_error(f) && dumbfile_pos(f) < dumbfile_get_size(f) )
+        {
+            unsigned int size = dumbfile_igetw( f );
+            switch (mptx_id)
+            {
+            /* TODO: Add more song extension readers */
+
+            case DUMB_ID('D','T','.','.'):
+                if ( size == 2 )
+                    sigdata->tempo = dumbfile_igetw( f );
+                else if ( size == 4 )
+                    sigdata->tempo = dumbfile_igetl( f );
+                break;
+
+            default:
+                dumbfile_skip(f, size);
+                break;
+            }
+            mptx_id = dumbfile_igetl( f );
+        }
+    }
+
+    free(buffer);
+	free(component);
+
+	_dumb_it_fix_invalid_orders(sigdata);
+
+	return sigdata;
+}
+
+
+
+DUH *DUMBEXPORT dumb_read_it_quick(DUMBFILE *f)
+{
+	sigdata_t *sigdata;
+
+	DUH_SIGTYPE_DESC *descptr = &_dumb_sigtype_it;
+
+	sigdata = it_load_sigdata(f);
+
+	if (!sigdata)
+		return NULL;
+
+	{
+		const char *tag[2][2];
+		tag[0][0] = "TITLE";
+        tag[0][1] = (const char *)(((DUMB_IT_SIGDATA *)sigdata)->name);
+		tag[1][0] = "FORMAT";
+		tag[1][1] = "IT";
+		return make_duh(-1, 2, (const char *const (*)[2])tag, 1, &descptr, &sigdata);
+	}
+}
diff --git a/libraries/dumb/src/it/itread2.c b/libraries/dumb/src/it/itread2.c
new file mode 100644
index 000000000..718565729
--- /dev/null
+++ b/libraries/dumb/src/it/itread2.c
@@ -0,0 +1,29 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * itread2.c - Function to read an Impulse Tracker    / / \  \
+ *             module from an open file and do an    | <  /   \_
+ *             initial run-through.                  |  \/ /\   /
+ *                                                    \_  /  > /
+ * Split off from itread.c by entheh.                   | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+
+
+
+DUH *DUMBEXPORT dumb_read_it(DUMBFILE *f)
+{
+	DUH *duh = dumb_read_it_quick(f);
+	dumb_it_do_initial_runthrough(duh);
+	return duh;
+}
diff --git a/libraries/dumb/src/it/itrender.c b/libraries/dumb/src/it/itrender.c
new file mode 100644
index 000000000..f9dc268e5
--- /dev/null
+++ b/libraries/dumb/src/it/itrender.c
@@ -0,0 +1,5961 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * itrender.c - Code to render an Impulse Tracker     / / \  \
+ *              module.                              | <  /   \_
+ *                                                   |  \/ /\   /
+ * Written - painstakingly - by entheh.               \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "dumb.h"
+#include "internal/dumb.h"
+#include "internal/it.h"
+#include "internal/lpc.h"
+
+#include "internal/resampler.h"
+#include "internal/mulsc.h"
+
+// #define BIT_ARRAY_BULLSHIT
+
+static IT_PLAYING *new_playing(DUMB_IT_SIGRENDERER *itsr)
+{
+	IT_PLAYING *r;
+
+	if (itsr->free_playing != NULL)
+	{
+		r = itsr->free_playing;
+		itsr->free_playing = r->next;
+		return r;
+	}
+	r = (IT_PLAYING *)malloc(sizeof(IT_PLAYING));
+	if (r)
+	{
+		r->resampler.fir_resampler_ratio = 0.0;
+		r->resampler.fir_resampler[0] = resampler_create();
+		if ( !r->resampler.fir_resampler[0] ) {
+			free( r );
+			return NULL;
+		}
+		r->resampler.fir_resampler[1] = resampler_create();
+		if ( !r->resampler.fir_resampler[1] ) {
+			resampler_delete( r->resampler.fir_resampler[0] );
+			free( r );
+			return NULL;
+		}
+	}
+	return r;
+}
+
+static void free_playing(DUMB_IT_SIGRENDERER *itsr, IT_PLAYING *playing)
+{
+	playing->next = itsr->free_playing;
+	itsr->free_playing = playing;
+}
+
+static void free_playing_orig(IT_PLAYING * r)
+{
+	resampler_delete( r->resampler.fir_resampler[1] );
+	resampler_delete( r->resampler.fir_resampler[0] );
+	free( r );
+}
+
+static IT_PLAYING *dup_playing(IT_PLAYING *src, IT_CHANNEL *dstchannel, IT_CHANNEL *srcchannel)
+{
+	IT_PLAYING *dst;
+
+	if (!src) return NULL;
+
+	dst = malloc(sizeof(*dst));
+	if (!dst) return NULL;
+
+	dst->flags = src->flags;
+	dst->resampling_quality = src->resampling_quality;
+
+	ASSERT(src->channel);
+	dst->channel = &dstchannel[src->channel - srcchannel];
+	dst->sample = src->sample;
+	dst->instrument = src->instrument;
+	dst->env_instrument = src->env_instrument;
+
+	dst->sampnum = src->sampnum;
+	dst->instnum = src->instnum;
+
+	dst->declick_stage = src->declick_stage;
+
+	dst->float_volume[0] = src->float_volume[0];
+	dst->float_volume[1] = src->float_volume[1];
+
+	dst->ramp_volume[0] = src->ramp_volume[0];
+	dst->ramp_volume[1] = src->ramp_volume[1];
+
+	dst->ramp_delta[0] = src->ramp_delta[0];
+	dst->ramp_delta[1] = src->ramp_delta[1];
+
+	dst->channel_volume = src->channel_volume;
+
+	dst->volume = src->volume;
+	dst->pan = src->pan;
+
+	dst->volume_offset = src->volume_offset;
+	dst->panning_offset = src->panning_offset;
+
+	dst->note = src->note;
+
+	dst->enabled_envelopes = src->enabled_envelopes;
+
+	dst->filter_cutoff = src->filter_cutoff;
+	dst->filter_resonance = src->filter_resonance;
+
+	dst->true_filter_cutoff = src->true_filter_cutoff;
+	dst->true_filter_resonance = src->true_filter_resonance;
+
+	dst->vibrato_speed = src->vibrato_speed;
+	dst->vibrato_depth = src->vibrato_depth;
+	dst->vibrato_n = src->vibrato_n;
+	dst->vibrato_time = src->vibrato_time;
+	dst->vibrato_waveform = src->vibrato_waveform;
+
+	dst->tremolo_speed = src->tremolo_speed;
+	dst->tremolo_depth = src->tremolo_depth;
+	dst->tremolo_time = src->tremolo_time;
+	dst->tremolo_waveform = src->tremolo_waveform;
+
+	dst->panbrello_speed = src->panbrello_speed;
+	dst->panbrello_depth = src->panbrello_depth;
+	dst->panbrello_time = src->panbrello_time;
+	dst->panbrello_waveform = src->panbrello_waveform;
+	dst->panbrello_random = src->panbrello_random;
+
+	dst->sample_vibrato_time = src->sample_vibrato_time;
+	dst->sample_vibrato_waveform = src->sample_vibrato_waveform;
+	dst->sample_vibrato_depth = src->sample_vibrato_depth;
+
+	dst->slide = src->slide;
+	dst->delta = src->delta;
+	dst->finetune = src->finetune;
+
+	dst->volume_envelope = src->volume_envelope;
+	dst->pan_envelope = src->pan_envelope;
+	dst->pitch_envelope = src->pitch_envelope;
+
+	dst->fadeoutcount = src->fadeoutcount;
+
+	dst->filter_state[0] = src->filter_state[0];
+	dst->filter_state[1] = src->filter_state[1];
+
+	dst->resampler = src->resampler;
+	dst->resampler.pickup_data = dst;
+	dst->resampler.fir_resampler_ratio = src->resampler.fir_resampler_ratio;
+	dst->resampler.fir_resampler[0] = resampler_dup( src->resampler.fir_resampler[0] );
+	if ( !dst->resampler.fir_resampler[0] ) {
+		free( dst );
+		return NULL;
+	}
+	dst->resampler.fir_resampler[1] = resampler_dup( src->resampler.fir_resampler[1] );
+	if ( !dst->resampler.fir_resampler[1] ) {
+		resampler_delete( dst->resampler.fir_resampler[0] );
+		free( dst );
+		return NULL;
+	}
+	dst->time_lost = src->time_lost;
+
+	//dst->output = src->output;
+
+	return dst;
+}
+
+
+
+static void dup_channel(IT_CHANNEL *dst, IT_CHANNEL *src)
+{
+	dst->flags = src->flags;
+
+	dst->volume = src->volume;
+	dst->volslide = src->volslide;
+	dst->xm_volslide = src->xm_volslide;
+	dst->panslide = src->panslide;
+
+	dst->pan = src->pan;
+	dst->truepan = src->truepan;
+
+	dst->channelvolume = src->channelvolume;
+	dst->channelvolslide = src->channelvolslide;
+
+	dst->instrument = src->instrument;
+	dst->note = src->note;
+
+	dst->SFmacro = src->SFmacro;
+
+	dst->filter_cutoff = src->filter_cutoff;
+	dst->filter_resonance = src->filter_resonance;
+
+	dst->key_off_count = src->key_off_count;
+	dst->note_cut_count = src->note_cut_count;
+	dst->note_delay_count = src->note_delay_count;
+	dst->note_delay_entry = src->note_delay_entry;
+
+	dst->new_note_action = src->new_note_action;
+
+	dst->arpeggio_table = src->arpeggio_table;
+	memcpy(dst->arpeggio_offsets, src->arpeggio_offsets, sizeof(dst->arpeggio_offsets));
+	dst->retrig = src->retrig;
+	dst->xm_retrig = src->xm_retrig;
+	dst->retrig_tick = src->retrig_tick;
+
+	dst->tremor_time = src->tremor_time;
+
+	dst->vibrato_waveform = src->vibrato_waveform;
+	dst->tremolo_waveform = src->tremolo_waveform;
+	dst->panbrello_waveform = src->panbrello_waveform;
+
+	dst->portamento = src->portamento;
+	dst->toneporta = src->toneporta;
+	dst->toneslide = src->toneslide;
+	dst->toneslide_tick = src->toneslide_tick;
+	dst->last_toneslide_tick = src->last_toneslide_tick;
+	dst->ptm_toneslide = src->ptm_toneslide;
+	dst->ptm_last_toneslide = src->ptm_last_toneslide;
+	dst->okt_toneslide = src->okt_toneslide;
+	dst->destnote = src->destnote;
+
+	dst->glissando = src->glissando;
+
+	dst->sample = src->sample;
+	dst->truenote = src->truenote;
+
+	dst->midi_state = src->midi_state;
+
+	dst->lastvolslide = src->lastvolslide;
+	dst->lastDKL = src->lastDKL;
+	dst->lastEF = src->lastEF;
+	dst->lastG = src->lastG;
+	dst->lastHspeed = src->lastHspeed;
+	dst->lastHdepth = src->lastHdepth;
+	dst->lastRspeed = src->lastRspeed;
+	dst->lastRdepth = src->lastRdepth;
+	dst->lastYspeed = src->lastYspeed;
+	dst->lastYdepth = src->lastYdepth;
+	dst->lastI = src->lastI;
+	dst->lastJ = src->lastJ;
+	dst->lastN = src->lastN;
+	dst->lastO = src->lastO;
+	dst->high_offset = src->high_offset;
+	dst->lastP = src->lastP;
+	dst->lastQ = src->lastQ;
+	dst->lastS = src->lastS;
+	dst->pat_loop_row = src->pat_loop_row;
+	dst->pat_loop_count = src->pat_loop_count;
+	dst->pat_loop_end_row = src->pat_loop_end_row;
+	dst->lastW = src->lastW;
+
+	dst->xm_lastE1 = src->xm_lastE1;
+	dst->xm_lastE2 = src->xm_lastE2;
+	dst->xm_lastEA = src->xm_lastEA;
+	dst->xm_lastEB = src->xm_lastEB;
+	dst->xm_lastX1 = src->xm_lastX1;
+	dst->xm_lastX2 = src->xm_lastX2;
+
+	dst->inv_loop_delay = src->inv_loop_delay;
+	dst->inv_loop_speed = src->inv_loop_speed;
+	dst->inv_loop_offset = src->inv_loop_offset;
+
+	dst->playing = dup_playing(src->playing, dst, src);
+
+#ifdef BIT_ARRAY_BULLSHIT
+	dst->played_patjump = bit_array_dup(src->played_patjump);
+	dst->played_patjump_order = src->played_patjump_order;
+#endif
+
+	//dst->output = src->output;
+}
+
+
+
+/* Allocate the new callbacks first, then pass them to this function!
+ * It will free them on failure.
+ */
+static DUMB_IT_SIGRENDERER *dup_sigrenderer(DUMB_IT_SIGRENDERER *src, int n_channels, IT_CALLBACKS *callbacks)
+{
+	DUMB_IT_SIGRENDERER *dst;
+	int i;
+
+	if (!src) {
+		if (callbacks) free(callbacks);
+		return NULL;
+	}
+
+	dst = malloc(sizeof(*dst));
+	if (!dst) {
+		if (callbacks) free(callbacks);
+		return NULL;
+	}
+
+	dst->free_playing = NULL;
+	dst->sigdata = src->sigdata;
+
+	dst->n_channels = n_channels;
+
+	dst->resampling_quality = src->resampling_quality;
+
+	dst->globalvolume = src->globalvolume;
+	dst->globalvolslide = src->globalvolslide;
+
+	dst->tempo = src->tempo;
+	dst->temposlide = src->temposlide;
+
+	for (i = 0; i < DUMB_IT_N_CHANNELS; i++)
+		dup_channel(&dst->channel[i], &src->channel[i]);
+
+	for (i = 0; i < DUMB_IT_N_NNA_CHANNELS; i++)
+		dst->playing[i] = dup_playing(src->playing[i], dst->channel, src->channel);
+
+	dst->tick = src->tick;
+	dst->speed = src->speed;
+	dst->rowcount = src->rowcount;
+
+	dst->order = src->order;
+	dst->row = src->row;
+	dst->processorder = src->processorder;
+	dst->processrow = src->processrow;
+	dst->breakrow = src->breakrow;
+
+	dst->restart_position = src->restart_position;
+
+	dst->n_rows = src->n_rows;
+
+	dst->entry_start = src->entry_start;
+	dst->entry = src->entry;
+	dst->entry_end = src->entry_end;
+
+	dst->time_left = src->time_left;
+	dst->sub_time_left = src->sub_time_left;
+
+	dst->ramp_style = src->ramp_style;
+
+	dst->click_remover = NULL;
+
+	dst->callbacks = callbacks;
+
+#ifdef BIT_ARRAY_BULLSHIT
+	dst->played = bit_array_dup(src->played);
+#endif
+
+	dst->gvz_time = src->gvz_time;
+	dst->gvz_sub_time = src->gvz_sub_time;
+
+	//dst->max_output = src->max_output;
+
+	return dst;
+}
+
+
+
+static const IT_MIDI default_midi = {
+	/* unsigned char SFmacro[16][16]; */
+	{
+		{0xF0, 0xF0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
+	},
+	/* unsigned char SFmacrolen[16]; */
+	{4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+	/* unsigned short SFmacroz[16]; */
+	/* Bitfield; bit 0 set = z in first position */
+	{
+		0x0008, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+	},
+	/* unsigned char Zmacro[128][16]; */
+	{
+		{0xF0, 0xF0, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0xF0, 0xF0, 0x01, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0xF0, 0xF0, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0xF0, 0xF0, 0x01, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0xF0, 0xF0, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0xF0, 0xF0, 0x01, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0xF0, 0xF0, 0x01, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0xF0, 0xF0, 0x01, 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0xF0, 0xF0, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0xF0, 0xF0, 0x01, 0x48, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0xF0, 0xF0, 0x01, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0xF0, 0xF0, 0x01, 0x58, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0xF0, 0xF0, 0x01, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0xF0, 0xF0, 0x01, 0x68, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0xF0, 0xF0, 0x01, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0xF0, 0xF0, 0x01, 0x78, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
+	},
+	/* unsigned char Zmacrolen[128]; */
+	{
+		4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	}
+};
+
+
+
+static void it_reset_filter_state(IT_FILTER_STATE *state)
+{
+	state->currsample = 0;
+	state->prevsample = 0;
+}
+
+
+
+#define LOG10 2.30258509299
+
+/* IMPORTANT: This function expects one extra sample in 'src' so it can apply
+ * click removal. It reads size samples, starting from src[0], and writes its
+ * output starting at dst[pos]. The pos parameter is required for getting
+ * click removal right.
+ */
+
+static void it_filter_int(DUMB_CLICK_REMOVER *cr, IT_FILTER_STATE *state, sample_t *dst, int32 pos, sample_t *src, int32 size, int step, int sampfreq, int cutoff, int resonance)
+{
+	sample_t currsample = state->currsample;
+	sample_t prevsample = state->prevsample;
+
+	float a, b, c;
+
+	int32 datasize;
+
+	{
+		float inv_angle = (float)(sampfreq * pow(0.5, 0.25 + cutoff*(1.0/(24<<IT_ENVELOPE_SHIFT))) * (1.0/(2*3.14159265358979323846*110.0)));
+		float loss = (float)exp(resonance*(-LOG10*1.2/128.0));
+		float d, e;
+#if 0
+		loss *= 2; // This is the mistake most players seem to make!
+#endif
+
+#if 1
+		d = (1.0f - loss) / inv_angle;
+		if (d > 2.0f) d = 2.0f;
+		d = (loss - d) * inv_angle;
+		e = inv_angle * inv_angle;
+		a = 1.0f / (1.0f + d + e);
+		c = -e * a;
+		b = 1.0f - a - c;
+#else
+		a = 1.0f / (inv_angle*inv_angle + inv_angle*loss + loss);
+		c = -(inv_angle*inv_angle) * a;
+		b = 1.0f - a - c;
+#endif
+	}
+
+	dst += pos * step;
+	datasize = size * step;
+
+#define INT_FILTERS
+#ifdef INT_FILTERS
+#define SCALEB 12
+	{
+		int ai = (int)(a * (1 << (16+SCALEB)));
+		int bi = (int)(b * (1 << (16+SCALEB)));
+		int ci = (int)(c * (1 << (16+SCALEB)));
+		int i;
+
+		if (cr) {
+			sample_t startstep = MULSCA(src[0], ai) + MULSCA(currsample, bi) + MULSCA(prevsample, ci);
+			dumb_record_click(cr, pos, startstep);
+		}
+
+		for (i = 0; i < datasize; i += step) {
+			{
+				sample_t newsample = MULSCA(src[i], ai) + MULSCA(currsample, bi) + MULSCA(prevsample, ci);
+				prevsample = currsample;
+				currsample = newsample;
+			}
+			dst[i] += currsample;
+		}
+
+		if (cr) {
+			sample_t endstep = MULSCA(src[datasize], ai) + MULSCA(currsample, bi) + MULSCA(prevsample, ci);
+			dumb_record_click(cr, pos + size, -endstep);
+		}
+	}
+#else
+#error This version is broken - it does not use step, and state should contain floats for it
+	if (cr) {
+		float startstep = src[0]*a + currsample*b + prevsample*c;
+		dumb_record_click(cr, pos, (sample_t)startstep);
+	}
+
+	{
+		int i = size % 3;
+		while (i > 0) {
+			{
+				float newsample = *src++*a + currsample*b + prevsample*c;
+				prevsample = currsample;
+				currsample = newsample;
+			}
+			*dst++ += (sample_t)currsample;
+			i--;
+		}
+		i = size / 3;
+		while (i > 0) {
+			float newsample;
+			/* Gotta love unrolled loops! */
+			*dst++ += (sample_t)(newsample = *src++*a + currsample*b + prevsample*c);
+			*dst++ += (sample_t)(prevsample = *src++*a + newsample*b + currsample*c);
+			*dst++ += (sample_t)(currsample = *src++*a + prevsample*b + newsample*c);
+			i--;
+		}
+	}
+
+	if (cr) {
+		float endstep = src[datasize]*a + currsample*b + prevsample*c;
+		dumb_record_click(cr, pos + size, -(sample_t)endstep);
+	}
+#endif
+
+	state->currsample = currsample;
+	state->prevsample = prevsample;
+}
+
+#if defined(_USE_SSE) && (defined(_M_IX86) || defined(__i386__) || defined(_M_X64) || defined(__amd64__))
+#include <xmmintrin.h>
+
+static void it_filter_sse(DUMB_CLICK_REMOVER *cr, IT_FILTER_STATE *state, sample_t *dst, long pos, sample_t *src, long size, int step, int sampfreq, int cutoff, int resonance)
+{
+	__m128 data, impulse;
+	__m128 temp1, temp2;
+
+	sample_t currsample = state->currsample;
+	sample_t prevsample = state->prevsample;
+
+	float imp[4];
+
+	//profiler( filter_sse ); On ClawHammer Athlon64 3200+, ~12000 cycles, ~500 for that x87 setup code (as opposed to ~25500 for the original integer code)
+
+	long datasize;
+
+	{
+		float inv_angle = (float)(sampfreq * pow(0.5, 0.25 + cutoff*(1.0/(24<<IT_ENVELOPE_SHIFT))) * (1.0/(2*3.14159265358979323846*110.0)));
+		float loss = (float)exp(resonance*(-LOG10*1.2/128.0));
+		float d, e;
+#if 0
+		loss *= 2; // This is the mistake most players seem to make!
+#endif
+
+#if 1
+		d = (1.0f - loss) / inv_angle;
+		if (d > 2.0f) d = 2.0f;
+		d = (loss - d) * inv_angle;
+		e = inv_angle * inv_angle;
+		imp[0] = 1.0f / (1.0f + d + e);
+		imp[2] = -e * imp[0];
+		imp[1] = 1.0f - imp[0] - imp[2];
+#else
+		imp[0] = 1.0f / (inv_angle*inv_angle + inv_angle*loss + loss);
+		imp[2] = -(inv_angle*inv_angle) * imp[0];
+		imp[1] = 1.0f - imp[0] - imp[2];
+#endif
+		imp[3] = 0.0f;
+	}
+
+	dst += pos * step;
+	datasize = size * step;
+
+	{
+		int ai, bi, ci, i;
+
+		if (cr) {
+			sample_t startstep;
+			ai = (int)(imp[0] * (1 << (16+SCALEB)));
+			bi = (int)(imp[1] * (1 << (16+SCALEB)));
+			ci = (int)(imp[2] * (1 << (16+SCALEB)));
+			startstep = MULSCA(src[0], ai) + MULSCA(currsample, bi) + MULSCA(prevsample, ci);
+			dumb_record_click(cr, pos, startstep);
+		}
+
+		temp1 = _mm_setzero_ps();
+		data = _mm_cvtsi32_ss( temp1, currsample );
+		temp2 = _mm_cvtsi32_ss( temp1, prevsample );
+		impulse = _mm_loadu_ps( (const float *) &imp );
+		data = _mm_shuffle_ps( data, temp2, _MM_SHUFFLE(1, 0, 0, 1) );
+
+		for (i = 0; i < datasize; i += step) {
+			temp1 = _mm_cvtsi32_ss( data, src [i] );
+			temp1 = _mm_mul_ps( temp1, impulse );
+			temp2 = _mm_movehl_ps( temp2, temp1 );
+			temp1 = _mm_add_ps( temp1, temp2 );
+			temp2 = temp1;
+			temp2 = _mm_shuffle_ps( temp2, temp1, _MM_SHUFFLE(0, 0, 0, 1) );
+			temp1 = _mm_add_ps( temp1, temp2 );
+			temp1 = _mm_shuffle_ps( temp1, data, _MM_SHUFFLE(2, 1, 0, 0) );
+			data = temp1;
+			dst [i] += _mm_cvtss_si32( temp1 );
+		}
+
+		currsample = _mm_cvtss_si32( temp1 );
+		temp1 = _mm_shuffle_ps( temp1, data, _MM_SHUFFLE(0, 0, 0, 2) );
+		prevsample = _mm_cvtss_si32( temp1 );
+
+		if (cr) {
+			sample_t endstep = MULSCA(src[datasize], ai) + MULSCA(currsample, bi) + MULSCA(prevsample, ci);
+			dumb_record_click(cr, pos + size, -endstep);
+		}
+	}
+
+	state->currsample = currsample;
+	state->prevsample = prevsample;
+}
+#endif
+
+#undef LOG10
+
+#ifdef _USE_SSE
+#if defined(_M_IX86) || defined(__i386__)
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#elif defined(__clang__) || defined(__GNUC__)
+static inline void
+__cpuid(int *data, int selector)
+{
+#if defined(__PIC__) && defined(__i386__)
+    asm("xchgl %%ebx, %%esi; cpuid; xchgl %%ebx, %%esi"
+        : "=a" (data[0]),
+        "=S" (data[1]),
+        "=c" (data[2]),
+        "=d" (data[3])
+        : "0" (selector));
+#elif defined(__PIC__) && defined(__amd64__)
+    asm("xchg{q} {%%}rbx, %q1; cpuid; xchg{q} {%%}rbx, %q1"
+        : "=a" (data[0]),
+        "=&r" (data[1]),
+        "=c" (data[2]),
+        "=d" (data[3])
+        : "0" (selector));
+#else
+    asm("cpuid"
+        : "=a" (data[0]),
+        "=b" (data[1]),
+        "=c" (data[2]),
+        "=d" (data[3])
+        : "a"(selector));
+#endif
+}
+#else
+#define __cpuid(a,b) memset((a), 0, sizeof(int) * 4)
+#endif
+
+static int query_cpu_feature_sse() {
+	int buffer[4];
+	__cpuid(buffer,1);
+	if ((buffer[3]&(1<<25)) == 0) return 0;
+	return 1;
+}
+
+static int _dumb_it_use_sse = 0;
+
+void _dumb_init_sse()
+{
+    static int initialized = 0;
+    if (!initialized)
+    {
+        _dumb_it_use_sse = query_cpu_feature_sse();
+        initialized = 1;
+    }
+}
+
+#elif defined(_M_X64) || defined(__amd64__)
+
+static const int _dumb_it_use_sse = 1;
+
+void _dumb_init_sse() { }
+
+#else
+
+static const int _dumb_it_use_sse = 0;
+
+void _dumb_init_sse() { }
+
+#endif
+#endif
+
+static void it_filter(DUMB_CLICK_REMOVER *cr, IT_FILTER_STATE *state, sample_t *dst, int32 pos, sample_t *src, int32 size, int step, int sampfreq, int cutoff, int resonance)
+{
+#if defined(_USE_SSE) && (defined(_M_IX86) || defined(__i386__) || defined(_M_X64) || defined(__amd64__))
+    _dumb_init_sse();
+	if ( _dumb_it_use_sse ) it_filter_sse( cr, state, dst, pos, src, size, step, sampfreq, cutoff, resonance );
+	else
+#endif
+	it_filter_int( cr, state, dst, pos, src, size, step, sampfreq, cutoff, resonance );
+}
+
+
+
+static const signed char it_sine[256] = {
+	  0,  2,  3,  5,  6,  8,  9, 11, 12, 14, 16, 17, 19, 20, 22, 23,
+	 24, 26, 27, 29, 30, 32, 33, 34, 36, 37, 38, 39, 41, 42, 43, 44,
+	 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59,
+	 59, 60, 60, 61, 61, 62, 62, 62, 63, 63, 63, 64, 64, 64, 64, 64,
+	 64, 64, 64, 64, 64, 64, 63, 63, 63, 62, 62, 62, 61, 61, 60, 60,
+	 59, 59, 58, 57, 56, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46,
+	 45, 44, 43, 42, 41, 39, 38, 37, 36, 34, 33, 32, 30, 29, 27, 26,
+	 24, 23, 22, 20, 19, 17, 16, 14, 12, 11,  9,  8,  6,  5,  3,  2,
+	  0, -2, -3, -5, -6, -8, -9,-11,-12,-14,-16,-17,-19,-20,-22,-23,
+	-24,-26,-27,-29,-30,-32,-33,-34,-36,-37,-38,-39,-41,-42,-43,-44,
+	-45,-46,-47,-48,-49,-50,-51,-52,-53,-54,-55,-56,-56,-57,-58,-59,
+	-59,-60,-60,-61,-61,-62,-62,-62,-63,-63,-63,-64,-64,-64,-64,-64,
+	-64,-64,-64,-64,-64,-64,-63,-63,-63,-62,-62,-62,-61,-61,-60,-60,
+	-59,-59,-58,-57,-56,-56,-55,-54,-53,-52,-51,-50,-49,-48,-47,-46,
+	-45,-44,-43,-42,-41,-39,-38,-37,-36,-34,-33,-32,-30,-29,-27,-26,
+	-24,-23,-22,-20,-19,-17,-16,-14,-12,-11, -9, -8, -6, -5, -3, -2
+};
+
+
+
+#if 1
+/** WARNING: use these! */
+/** JULIEN: Plus for XM compatibility it could be interesting to rename
+ * it_sawtooth[] to it_rampdown[], and add an it_rampup[].
+ * Also, still for XM compat', twood be good if it was possible to tell the
+ * the player not to retrig' the waveform on a new instrument.
+ * Both of these are only for completness though, as I don't think it would
+ * be very noticeable ;)
+ */
+/** ENTHEH: IT also has the 'don't retrig' thingy :) */
+static const signed char it_sawtooth[256] = {
+	 64, 63, 63, 62, 62, 61, 61, 60, 60, 59, 59, 58, 58, 57, 57, 56,
+	 56, 55, 55, 54, 54, 53, 53, 52, 52, 51, 51, 50, 50, 49, 49, 48,
+	 48, 47, 47, 46, 46, 45, 45, 44, 44, 43, 43, 42, 42, 41, 41, 40,
+	 40, 39, 39, 38, 38, 37, 37, 36, 36, 35, 35, 34, 34, 33, 33, 32,
+	 32, 31, 31, 30, 30, 29, 29, 28, 28, 27, 27, 26, 26, 25, 25, 24,
+	 24, 23, 23, 22, 22, 21, 21, 20, 20, 19, 19, 18, 18, 17, 17, 16,
+	 16, 15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10,  9,  9,  8,
+	  8,  7,  7,  6,  6,  5,  5,  4,  4,  3,  3,  2,  2,  1,  1,  0,
+	  0, -1, -1, -2, -2, -3, -3, -4, -4, -5, -5, -6, -6, -7, -7, -8,
+	 -8, -9, -9,-10,-10,-11,-11,-12,-12,-13,-13,-14,-14,-15,-15,-16,
+	-16,-17,-17,-18,-18,-19,-19,-20,-20,-21,-21,-22,-22,-23,-23,-24,
+	-24,-25,-25,-26,-26,-27,-27,-28,-28,-29,-29,-30,-30,-31,-31,-32,
+	-32,-33,-33,-34,-34,-35,-35,-36,-36,-37,-37,-38,-38,-39,-39,-40,
+	-40,-41,-41,-42,-42,-43,-43,-44,-44,-45,-45,-46,-46,-47,-47,-48,
+	-48,-49,-49,-50,-50,-51,-51,-52,-52,-53,-53,-54,-54,-55,-55,-56,
+	-56,-57,-57,-58,-58,-59,-59,-60,-60,-61,-61,-62,-62,-63,-63,-64
+};
+
+static const signed char it_squarewave[256] = {
+	 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+	 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+	 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+	 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+	 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+	 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+	 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+	 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
+};
+
+static const signed char it_xm_ramp[256] = {
+	  0, -1, -1, -2, -2, -3, -3, -4, -4, -5, -5, -6, -6, -7, -7, -8,
+	 -8, -9, -9,-10,-10,-11,-11,-12,-12,-13,-13,-14,-14,-15,-15,-16,
+	-16,-17,-17,-18,-18,-19,-19,-20,-20,-21,-21,-22,-22,-23,-23,-24,
+	-24,-25,-25,-26,-26,-27,-27,-28,-28,-29,-29,-30,-30,-31,-31,-32,
+	-32,-33,-33,-34,-34,-35,-35,-36,-36,-37,-37,-38,-38,-39,-39,-40,
+	-40,-41,-41,-42,-42,-43,-43,-44,-44,-45,-45,-46,-46,-47,-47,-48,
+	-48,-49,-49,-50,-50,-51,-51,-52,-52,-53,-53,-54,-54,-55,-55,-56,
+	-56,-57,-57,-58,-58,-59,-59,-60,-60,-61,-61,-62,-62,-63,-63,-64,
+	 64, 63, 63, 62, 62, 61, 61, 60, 60, 59, 59, 58, 58, 57, 57, 56,
+	 56, 55, 55, 54, 54, 53, 53, 52, 52, 51, 51, 50, 50, 49, 49, 48,
+	 48, 47, 47, 46, 46, 45, 45, 44, 44, 43, 43, 42, 42, 41, 41, 40,
+	 40, 39, 39, 38, 38, 37, 37, 36, 36, 35, 35, 34, 34, 33, 33, 32,
+	 32, 31, 31, 30, 30, 29, 29, 28, 28, 27, 27, 26, 26, 25, 25, 24,
+	 24, 23, 23, 22, 22, 21, 21, 20, 20, 19, 19, 18, 18, 17, 17, 16,
+	 16, 15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10,  9,  9,  8,
+	  8,  7,  7,  6,  6,  5,  5,  4,  4,  3,  3,  2,  2,  1,  1,  0
+};
+
+static const signed char it_xm_squarewave[256] = {
+	 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+	 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+	 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+	 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+	 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+	 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+	 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+	 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+	-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,
+	-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,
+	-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,
+	-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,
+	-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,
+	-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,
+	-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,
+	-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64,-64
+};
+
+#endif
+
+
+
+static void reset_tick_counts(DUMB_IT_SIGRENDERER *sigrenderer)
+{
+	int i;
+
+	for (i = 0; i < DUMB_IT_N_CHANNELS; i++) {
+		IT_CHANNEL *channel = &sigrenderer->channel[i];
+		channel->key_off_count = 0;
+		channel->note_cut_count = 0;
+		channel->note_delay_count = 0;
+	}
+}
+
+
+
+static const unsigned char arpeggio_mod[32] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1};
+static const unsigned char arpeggio_xm[32] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2};
+static const unsigned char arpeggio_okt_3[32] = {1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0};
+static const unsigned char arpeggio_okt_4[32] = {0, 2, 0, 1, 0, 2, 0, 1, 0, 2, 0, 1, 0, 2, 0, 1, 0, 2, 0, 1, 0, 2, 0, 1, 0, 2, 0, 1, 0, 2, 0, 1};
+static const unsigned char arpeggio_okt_5[32] = {2, 2, 0, 2, 2, 0, 2, 2, 0, 2, 2, 0, 2, 2, 0, 2, 2, 0, 2, 2, 0, 2, 2, 0, 2, 2, 0, 2, 2, 0, 2, 2};
+
+
+
+static void reset_channel_effects(IT_CHANNEL *channel)
+{
+	channel->volslide = 0;
+	channel->xm_volslide = 0;
+	channel->panslide = 0;
+	channel->channelvolslide = 0;
+	channel->arpeggio_table = (const unsigned char *) &arpeggio_mod;
+	memset(channel->arpeggio_offsets, 0, sizeof(channel->arpeggio_offsets));
+	channel->retrig = 0;
+	if (channel->xm_retrig) {
+		channel->xm_retrig = 0;
+		channel->retrig_tick = 0;
+	}
+	channel->tremor_time &= 127;
+	channel->portamento = 0;
+	channel->toneporta = 0;
+	if (channel->ptm_toneslide) {
+		channel->ptm_last_toneslide = channel->ptm_toneslide;
+		channel->last_toneslide_tick = channel->toneslide_tick;
+	} else
+		channel->ptm_last_toneslide = 0;
+	channel->ptm_toneslide = 0;
+	channel->toneslide_tick = 0;
+	channel->okt_toneslide = 0;
+	if (channel->playing) {
+		channel->playing->vibrato_n = 0;
+		channel->playing->tremolo_speed = 0;
+		channel->playing->tremolo_depth = 0;
+		channel->playing->panbrello_speed = 0;
+	}
+}
+
+static void reset_effects(DUMB_IT_SIGRENDERER *sigrenderer)
+{
+	int i;
+
+	sigrenderer->globalvolslide = 0;
+	sigrenderer->temposlide = 0;
+
+	for (i = 0; i < DUMB_IT_N_CHANNELS; i++) {
+		reset_channel_effects(&sigrenderer->channel[i]);
+	}
+}
+
+
+
+static void update_tremor(IT_CHANNEL *channel)
+{
+	if ((channel->tremor_time & 128) && channel->playing) {
+		if (channel->tremor_time == 128)
+			channel->tremor_time = (channel->lastI >> 4) | 192;
+		else if (channel->tremor_time == 192)
+			channel->tremor_time = (channel->lastI & 15) | 128;
+		else
+			channel->tremor_time--;
+	}
+}
+
+
+
+static void it_pickup_loop(DUMB_RESAMPLER *resampler, void *data)
+{
+	resampler->pos -= resampler->end - resampler->start;
+	((IT_PLAYING *)data)->time_lost += resampler->end - resampler->start;
+}
+
+
+
+static void it_pickup_pingpong_loop(DUMB_RESAMPLER *resampler, void *data)
+{
+	if (resampler->dir < 0) {
+		resampler->pos = (resampler->start << 1) - 1 - resampler->pos;
+		resampler->subpos ^= 65535;
+		resampler->dir = 1;
+		((IT_PLAYING *)data)->time_lost += (resampler->end - resampler->start) << 1;
+	} else {
+		resampler->pos = (resampler->end << 1) - 1 - resampler->pos;
+		resampler->subpos ^= 65535;
+		resampler->dir = -1;
+	}
+}
+
+
+
+static void it_pickup_stop_at_end(DUMB_RESAMPLER *resampler, void *data)
+{
+	(void)data;
+
+	if (resampler->dir < 0) {
+		resampler->pos = (resampler->start << 1) - 1 - resampler->pos;
+		resampler->subpos ^= 65535;
+		/* By rights, time_lost would be updated here. However, there is no
+		 * need at this point; it will not be used.
+		 *
+		 * ((IT_PLAYING *)data)->time_lost += (resampler->src_end - resampler->src_start) << 1;
+		 */
+		resampler->dir = 1;
+	} else
+		resampler->dir = 0;
+}
+
+
+
+static void it_pickup_stop_after_reverse(DUMB_RESAMPLER *resampler, void *data)
+{
+	(void)data;
+
+	resampler->dir = 0;
+}
+
+
+
+static void it_playing_update_resamplers(IT_PLAYING *playing)
+{
+	if ((playing->sample->flags & IT_SAMPLE_SUS_LOOP) && !(playing->flags & IT_PLAYING_SUSTAINOFF)) {
+		playing->resampler.start = playing->sample->sus_loop_start;
+		playing->resampler.end = playing->sample->sus_loop_end;
+		if (playing->resampler.start == playing->resampler.end)
+			playing->resampler.pickup = &it_pickup_stop_at_end;
+		else if (playing->sample->flags & IT_SAMPLE_PINGPONG_SUS_LOOP)
+			playing->resampler.pickup = &it_pickup_pingpong_loop;
+		else
+			playing->resampler.pickup = &it_pickup_loop;
+	} else if (playing->sample->flags & IT_SAMPLE_LOOP) {
+		playing->resampler.start = playing->sample->loop_start;
+		playing->resampler.end = playing->sample->loop_end;
+		if (playing->resampler.start == playing->resampler.end)
+			playing->resampler.pickup = &it_pickup_stop_at_end;
+		else if (playing->sample->flags & IT_SAMPLE_PINGPONG_LOOP)
+			playing->resampler.pickup = &it_pickup_pingpong_loop;
+		else
+			playing->resampler.pickup = &it_pickup_loop;
+	} else if (playing->flags & IT_PLAYING_REVERSE) {
+		playing->resampler.start = 0;
+		playing->resampler.end = playing->sample->length;
+		playing->resampler.dir = -1;
+		playing->resampler.pickup = &it_pickup_stop_after_reverse;
+	} else {
+		if (playing->sample->flags & IT_SAMPLE_SUS_LOOP)
+			playing->resampler.start = playing->sample->sus_loop_start;
+		else
+			playing->resampler.start = 0;
+		playing->resampler.end = playing->sample->length;
+		playing->resampler.pickup = &it_pickup_stop_at_end;
+	}
+	ASSERT(playing->resampler.pickup_data == playing);
+}
+
+
+
+/* This should be called whenever the sample or sample position changes. */
+static void it_playing_reset_resamplers(IT_PLAYING *playing, int32 pos)
+{
+	int bits = playing->sample->flags & IT_SAMPLE_16BIT ? 16 : 8;
+	int quality = playing->resampling_quality;
+	int channels = playing->sample->flags & IT_SAMPLE_STEREO ? 2 : 1;
+	if (playing->sample->max_resampling_quality >= 0 && quality > playing->sample->max_resampling_quality)
+		quality = playing->sample->max_resampling_quality;
+	dumb_reset_resampler_n(bits, &playing->resampler, playing->sample->data, channels, pos, 0, 0, quality);
+	playing->resampler.pickup_data = playing;
+	playing->time_lost = 0;
+	playing->flags &= ~IT_PLAYING_DEAD;
+	it_playing_update_resamplers(playing);
+}
+
+static void it_retrigger_note(DUMB_IT_SIGRENDERER *sigrenderer, IT_CHANNEL *channel);
+
+/* Should we only be retriggering short samples on XM? */
+
+static void update_retrig(DUMB_IT_SIGRENDERER *sigrenderer, IT_CHANNEL *channel)
+{
+	if (channel->xm_retrig) {
+		channel->retrig_tick--;
+		if (channel->retrig_tick <= 0) {
+			if (channel->playing) {
+				it_playing_reset_resamplers(channel->playing, 0);
+				channel->playing->declick_stage = 0;
+			} else if (sigrenderer->sigdata->flags & IT_WAS_AN_XM) it_retrigger_note(sigrenderer, channel);
+			channel->retrig_tick = channel->xm_retrig;
+		}
+	} else if (channel->retrig & 0x0F) {
+		channel->retrig_tick--;
+		if (channel->retrig_tick <= 0) {
+			if (channel->retrig < 0x10) {
+			} else if (channel->retrig < 0x20) {
+				channel->volume--;
+				if (channel->volume > 64) channel->volume = 0;
+			} else if (channel->retrig < 0x30) {
+				channel->volume -= 2;
+				if (channel->volume > 64) channel->volume = 0;
+			} else if (channel->retrig < 0x40) {
+				channel->volume -= 4;
+				if (channel->volume > 64) channel->volume = 0;
+			} else if (channel->retrig < 0x50) {
+				channel->volume -= 8;
+				if (channel->volume > 64) channel->volume = 0;
+			} else if (channel->retrig < 0x60) {
+				channel->volume -= 16;
+				if (channel->volume > 64) channel->volume = 0;
+			} else if (channel->retrig < 0x70) {
+				channel->volume <<= 1;
+				channel->volume /= 3;
+			} else if (channel->retrig < 0x80) {
+				channel->volume >>= 1;
+			} else if (channel->retrig < 0x90) {
+			} else if (channel->retrig < 0xA0) {
+				channel->volume++;
+				if (channel->volume > 64) channel->volume = 64;
+			} else if (channel->retrig < 0xB0) {
+				channel->volume += 2;
+				if (channel->volume > 64) channel->volume = 64;
+			} else if (channel->retrig < 0xC0) {
+				channel->volume += 4;
+				if (channel->volume > 64) channel->volume = 64;
+			} else if (channel->retrig < 0xD0) {
+				channel->volume += 8;
+				if (channel->volume > 64) channel->volume = 64;
+			} else if (channel->retrig < 0xE0) {
+				channel->volume += 16;
+				if (channel->volume > 64) channel->volume = 64;
+			} else if (channel->retrig < 0xF0) {
+				channel->volume *= 3;
+				channel->volume >>= 1;
+				if (channel->volume > 64) channel->volume = 64;
+			} else {
+				channel->volume <<= 1;
+				if (channel->volume > 64) channel->volume = 64;
+			}
+			if (channel->playing) {
+				it_playing_reset_resamplers(channel->playing, 0);
+				channel->playing->declick_stage = 0;
+			} else if (sigrenderer->sigdata->flags & IT_WAS_AN_XM) it_retrigger_note(sigrenderer, channel);
+			channel->retrig_tick = channel->retrig & 0x0F;
+		}
+	}
+}
+
+
+static void update_smooth_effects_playing(IT_PLAYING *playing)
+{
+	playing->vibrato_time += playing->vibrato_n *
+		(playing->vibrato_speed << 2);
+	playing->tremolo_time += playing->tremolo_speed << 2;
+	playing->panbrello_time += playing->panbrello_speed;
+	if (playing->panbrello_waveform == 3)
+		playing->panbrello_random = (rand() % 129) - 64;
+}
+
+static void update_smooth_effects(DUMB_IT_SIGRENDERER *sigrenderer)
+{
+	int i;
+
+	for (i = 0; i < DUMB_IT_N_CHANNELS; i++) {
+		IT_CHANNEL *channel = &sigrenderer->channel[i];
+		IT_PLAYING *playing = channel->playing;
+
+		if (playing) {
+			update_smooth_effects_playing(playing);
+		}
+	}
+
+	for (i = 0; i < DUMB_IT_N_NNA_CHANNELS; i++) {
+		IT_PLAYING *playing = sigrenderer->playing[i];
+
+		if (playing) {
+			update_smooth_effects_playing(playing);
+		}
+	}
+}
+
+
+static const unsigned char pt_tab_invloop[16] =
+{
+	0x00, 0x05, 0x06, 0x07, 0x08, 0x0A, 0x0B, 0x0D,
+	0x0F, 0x13, 0x16, 0x1A, 0x20, 0x2B, 0x40, 0x80
+};
+
+static void update_invert_loop(IT_CHANNEL *channel, IT_SAMPLE *sample)
+{
+	channel->inv_loop_delay += pt_tab_invloop[channel->inv_loop_speed];
+	if (channel->inv_loop_delay >= 0x80)
+	{
+		channel->inv_loop_delay = 0;
+
+		if (sample && ((sample->flags & (IT_SAMPLE_EXISTS | IT_SAMPLE_LOOP)) == (IT_SAMPLE_EXISTS | IT_SAMPLE_LOOP)) && !(sample->flags & (IT_SAMPLE_STEREO | IT_SAMPLE_16BIT)))
+		{
+			if (sample->loop_end - sample->loop_start >= 4)
+			{
+				channel->inv_loop_offset++;
+				if (channel->inv_loop_offset >= (sample->loop_end - sample->loop_start)) channel->inv_loop_offset = 0;
+
+				((char *)sample->data)[sample->loop_start + channel->inv_loop_offset] ^= 0xFF;
+			}
+		}
+	}
+}
+
+
+static void update_playing_effects(IT_PLAYING *playing)
+{
+	IT_CHANNEL *channel = playing->channel;
+
+	if (channel->channelvolslide) {
+		playing->channel_volume = channel->channelvolume;
+	}
+
+	if (channel->okt_toneslide) {
+		if (channel->okt_toneslide--) {
+			playing->note += channel->toneslide;
+			if (playing->note >= 120) {
+				if (channel->toneslide < 0) playing->note = 0;
+				else playing->note = 119;
+			}
+		}
+	} else if (channel->ptm_toneslide) {
+		if (--channel->toneslide_tick == 0) {
+			channel->toneslide_tick = channel->ptm_toneslide;
+			if (playing) {
+				playing->note += channel->toneslide;
+				if (playing->note >= 120) {
+					if (channel->toneslide < 0) playing->note = 0;
+					else playing->note = 119;
+				}
+				if (channel->playing == playing) {
+					channel->note = channel->truenote = playing->note;
+				}
+				if (channel->toneslide_retrig) {
+					it_playing_reset_resamplers(playing, 0);
+					playing->declick_stage = 0;
+				}
+			}
+		}
+	}
+}
+
+
+static void update_effects(DUMB_IT_SIGRENDERER *sigrenderer)
+{
+    int i;
+
+	if (sigrenderer->globalvolslide) {
+		sigrenderer->globalvolume += sigrenderer->globalvolslide;
+		if (sigrenderer->globalvolume > 128) {
+			if (sigrenderer->globalvolslide >= 0)
+				sigrenderer->globalvolume = 128;
+			else
+				sigrenderer->globalvolume = 0;
+		}
+	}
+
+	if (sigrenderer->temposlide) {
+		sigrenderer->tempo += sigrenderer->temposlide;
+		if (sigrenderer->tempo < 32) {
+			if (sigrenderer->temposlide >= 0)
+				sigrenderer->tempo = 255;
+			else
+				sigrenderer->tempo = 32;
+		}
+	}
+
+	for (i = 0; i < DUMB_IT_N_CHANNELS; i++) {
+		IT_CHANNEL *channel = &sigrenderer->channel[i];
+		IT_PLAYING *playing = channel->playing;
+
+		if (channel->xm_volslide) {
+			channel->volume += channel->xm_volslide;
+			if (channel->volume > 64) {
+				if (channel->xm_volslide >= 0)
+					channel->volume = 64;
+				else
+					channel->volume = 0;
+			}
+		}
+
+		if (channel->volslide) {
+			int clip = (sigrenderer->sigdata->flags & IT_WAS_AN_S3M) ? 63 : 64;
+			channel->volume += channel->volslide;
+			if (channel->volume > clip) {
+				if (channel->volslide >= 0)
+					channel->volume = clip;
+				else
+					channel->volume = 0;
+			}
+		}
+
+		if (channel->panslide) {
+			if (sigrenderer->sigdata->flags & IT_WAS_AN_XM) {
+				if (IT_IS_SURROUND(channel->pan))
+				{
+					channel->pan = 32;
+					channel->truepan = 32 + 128 * 64;
+				}
+				if (channel->panslide == -128)
+					channel->truepan = 32;
+				else
+					channel->truepan = MID(32, channel->truepan + channel->panslide*64, 32+255*64);
+			} else {
+				if (IT_IS_SURROUND(channel->pan))
+				{
+					channel->pan = 32;
+				}
+				channel->pan += channel->panslide;
+				if (channel->pan > 64) {
+					if (channel->panslide >= 0)
+						channel->pan = 64;
+					else
+						channel->pan = 0;
+				}
+				channel->truepan = channel->pan << IT_ENVELOPE_SHIFT;
+			}
+		}
+
+		if (channel->channelvolslide) {
+			channel->channelvolume += channel->channelvolslide;
+			if (channel->channelvolume > 64) {
+				if (channel->channelvolslide >= 0)
+					channel->channelvolume = 64;
+				else
+					channel->channelvolume = 0;
+			}
+		}
+
+		update_tremor(channel);
+
+		update_retrig(sigrenderer, channel);
+
+		if (channel->inv_loop_speed) update_invert_loop(channel, playing ? playing->sample : NULL);
+
+		if (playing) {
+			playing->slide += channel->portamento;
+
+			if (sigrenderer->sigdata->flags & IT_LINEAR_SLIDES) {
+				if (channel->toneporta && channel->destnote < 120) {
+					int currpitch = ((playing->note - 60) << 8) + playing->slide;
+					int destpitch = (channel->destnote - 60) << 8;
+					if (currpitch > destpitch) {
+						currpitch -= channel->toneporta;
+						if (currpitch < destpitch) {
+							currpitch = destpitch;
+							channel->destnote = IT_NOTE_OFF;
+						}
+					} else if (currpitch < destpitch) {
+						currpitch += channel->toneporta;
+						if (currpitch > destpitch) {
+							currpitch = destpitch;
+							channel->destnote = IT_NOTE_OFF;
+						}
+					}
+					playing->slide = currpitch - ((playing->note - 60) << 8);
+				}
+			} else {
+				if (channel->toneporta && channel->destnote < 120) {
+					float amiga_multiplier = playing->sample->C5_speed * (1.0f / AMIGA_DIVISOR);
+
+					float deltanote = (float)pow(DUMB_SEMITONE_BASE, 60 - playing->note);
+					/* deltanote is 1.0 for C-5, 0.5 for C-6, etc. */
+
+					float deltaslid = deltanote - playing->slide * amiga_multiplier;
+
+					float destdelta = (float)pow(DUMB_SEMITONE_BASE, 60 - channel->destnote);
+					if (deltaslid < destdelta) {
+						playing->slide -= channel->toneporta;
+						deltaslid = deltanote - playing->slide * amiga_multiplier;
+						if (deltaslid > destdelta) {
+							playing->note = channel->destnote;
+							playing->slide = 0;
+							channel->destnote = IT_NOTE_OFF;
+						}
+					} else {
+						playing->slide += channel->toneporta;
+						deltaslid = deltanote - playing->slide * amiga_multiplier;
+						if (deltaslid < destdelta) {
+							playing->note = channel->destnote;
+							playing->slide = 0;
+							channel->destnote = IT_NOTE_OFF;
+						}
+					}
+				}
+			}
+
+			update_playing_effects(playing);
+		}
+	}
+
+	for (i = 0; i < DUMB_IT_N_NNA_CHANNELS; i++) {
+		IT_PLAYING *playing = sigrenderer->playing[i];
+		if (playing) update_playing_effects(playing);
+	}
+
+	update_smooth_effects(sigrenderer);
+}
+
+
+static void it_note_off(IT_PLAYING *playing);
+
+// This function should be renamed; it doesn't do the 'Update Pattern Variables' operation ittech.txt describes
+/* Returns 1 if a pattern loop is happening. */
+static int update_pattern_variables(DUMB_IT_SIGRENDERER *sigrenderer, IT_ENTRY *entry)
+{
+	IT_CHANNEL *channel = &sigrenderer->channel[(int)entry->channel];
+
+	if (entry->mask & IT_ENTRY_EFFECT) {
+		switch (entry->effect) {
+			case IT_JUMP_TO_ORDER:
+				/* XXX jump and break in same row */
+				if ( ( ( sigrenderer->processrow | 0xC00 ) == 0xFFFE ) &&
+					! ( sigrenderer->processrow & 0x800 ) ) {
+					sigrenderer->processrow = 0xFFFE & ~0xC00;
+				} else {
+					sigrenderer->breakrow = 0;
+					sigrenderer->processrow = 0xFFFE & ~0x400;
+				}
+				sigrenderer->processorder = entry->effectvalue - 1;
+				break;
+
+			case IT_S:
+				{
+					unsigned char effectvalue = entry->effectvalue;
+					if (sigrenderer->sigdata->flags & IT_WAS_AN_S3M) {
+						if (effectvalue == 0)
+							effectvalue = channel->lastDKL;
+						channel->lastDKL = effectvalue;
+					} else {
+						if (effectvalue == 0)
+							effectvalue = channel->lastS;
+					}
+					channel->lastS = effectvalue;
+					switch (effectvalue >> 4) {
+						case IT_S_PATTERN_LOOP:
+							{
+								unsigned char v = effectvalue & 15;
+								if (v == 0) {
+#ifdef BIT_ARRAY_BULLSHIT
+									if (!channel->played_patjump)
+										channel->played_patjump = bit_array_create(256);
+									else {
+										if ( channel->played_patjump_order != 0xFFFE && channel->played_patjump_order != sigrenderer->order )
+											bit_array_merge(sigrenderer->played, channel->played_patjump, channel->played_patjump_order * 256);
+										//if (channel->played_patjump_order != sigrenderer->order)
+											bit_array_reset(channel->played_patjump);
+									}
+									channel->played_patjump_order = sigrenderer->order;
+#endif
+									channel->pat_loop_row = sigrenderer->processrow;
+								} else {
+									if (channel->pat_loop_count == 0) {
+#ifdef BIT_ARRAY_BULLSHIT
+										/* wft, uninitialized and no start marker yet... */
+										if (channel->played_patjump_order == 0xFFFE) {
+											int n;
+											bit_array_destroy(channel->played_patjump);
+											channel->played_patjump = bit_array_create(256);
+											for (n = channel->pat_loop_row; n <= sigrenderer->row; n++)
+												bit_array_clear(sigrenderer->played, sigrenderer->order * 256 + n);
+											channel->played_patjump_order = sigrenderer->order;
+										} else if (channel->played_patjump_order == sigrenderer->order) {
+											bit_array_set(channel->played_patjump, sigrenderer->row);
+											bit_array_mask(sigrenderer->played, channel->played_patjump, channel->played_patjump_order * 256);
+											//bit_array_reset(channel->played_patjump);
+										}
+#endif
+										channel->pat_loop_count = v;
+										sigrenderer->breakrow = channel->pat_loop_row;
+										if ((sigrenderer->sigdata->flags & (IT_WAS_AN_XM|IT_WAS_A_MOD)) == IT_WAS_AN_XM) {
+											/* For XM files, if a loop occurs by itself, keep breakrow set for when the pattern ends - fun bug in FT2! */
+											if ((sigrenderer->processrow|0xC00) < 0xFFFE) {
+												/* Infinite pattern loops are possible, so we check whether the pattern loop we're hitting now is earlier than the last one we hit. */
+												if (sigrenderer->processrow < channel->pat_loop_end_row)
+													sigrenderer->processorder = 0xFFFE; /* suspect infinite loop, so trigger loop callback */
+												else
+													sigrenderer->processorder = 0xFFFF; /* don't trigger loop callback */
+												channel->pat_loop_end_row = sigrenderer->processrow;
+												sigrenderer->processrow = 0xFFFF; /* special case: don't reset breakrow or pat_loop_end_row */
+											}
+										} else {
+											/* IT files do this regardless of other flow control effects seen here. */
+											sigrenderer->processorder = 0xFFFF; /* special case: don't trigger loop callback */
+											sigrenderer->processrow = 0xFFFE;
+										}
+										return 1;
+									} else if (--channel->pat_loop_count) {
+#ifdef BIT_ARRAY_BULLSHIT
+										if (channel->played_patjump_order == sigrenderer->order) {
+											bit_array_set(channel->played_patjump, sigrenderer->row);
+											bit_array_mask(sigrenderer->played, channel->played_patjump, channel->played_patjump_order * 256);
+											//bit_array_reset(channel->played_patjump);
+										}
+#endif
+										sigrenderer->breakrow = channel->pat_loop_row;
+										if ((sigrenderer->sigdata->flags & (IT_WAS_AN_XM|IT_WAS_A_MOD)) == IT_WAS_AN_XM) {
+											/* For XM files, if a loop occurs by itself, keep breakrow set for when the pattern ends - fun bug in FT2! */
+											if ((sigrenderer->processrow|0xC00) < 0xFFFE) {
+												/* Infinite pattern loops are possible, so we check whether the pattern loop we're hitting now is earlier than the last one we hit. */
+												if (sigrenderer->processrow < channel->pat_loop_end_row)
+													sigrenderer->processorder = 0xFFFE; /* suspect infinite loop, so trigger loop callback */
+												else
+													sigrenderer->processorder = 0xFFFF; /* don't trigger loop callback */
+												channel->pat_loop_end_row = sigrenderer->processrow;
+												sigrenderer->processrow = 0xFFFF; /* special case: don't reset breakrow or pat_loop_end_row */
+											}
+										} else {
+											/* IT files do this regardless of other flow control effects seen here. */
+											sigrenderer->processorder = 0xFFFF; /* special case: don't trigger loop callback */
+											sigrenderer->processrow = 0xFFFE;
+										}
+										return 1;
+									} else if ((sigrenderer->sigdata->flags & (IT_WAS_AN_XM|IT_WAS_A_MOD)) == IT_WAS_AN_XM) {
+										channel->pat_loop_end_row = 0;
+										// TODO
+										/* Findings:
+										- If a pattern loop completes successfully, and then the pattern terminates, then the next pattern will start on the row corresponding to the E60.
+										- If a pattern loop doesn't do any loops, and then the pattern terminates, then the next pattern will start on the first row.
+										- If a break appears to the left of the pattern loop, it jumps into the relevant position in the next pattern, and that's it.
+										- If a break appears to the right of the pattern loop, it jumps to the start of the next pattern, and that's it.
+										- If we jump, then effect a loop using an old E60, and then the pattern ends, the next pattern starts on the row corresponding to the E60.
+										- Theory: breakrow is not cleared when it's a pattern loop effect!
+										*/
+										if ((sigrenderer->processrow | 0xC00) < 0xFFFE) // I have no idea if this is correct or not - FT2 is so weird :(
+											sigrenderer->breakrow = channel->pat_loop_row; /* emulate bug in FT2 */
+									} else
+										channel->pat_loop_row = sigrenderer->processrow + 1;
+#ifdef BIT_ARRAY_BULLSHIT
+									/*channel->played_patjump_order |= 0x8000;*/
+									if (channel->played_patjump_order == sigrenderer->order) {
+										bit_array_destroy(channel->played_patjump);
+										channel->played_patjump = 0;
+										channel->played_patjump_order = 0xFFFE;
+									}
+									bit_array_clear(sigrenderer->played, sigrenderer->order * 256 + sigrenderer->row);
+#endif
+								}
+							}
+							break;
+						case IT_S_PATTERN_DELAY:
+							sigrenderer->rowcount = 1 + (effectvalue & 15);
+							break;
+					}
+				}
+		}
+	}
+
+	return 0;
+}
+
+
+
+/* This function guarantees that channel->sample will always be valid if it
+ * is nonzero. In other words, to check if it is valid, simply check if it is
+ * nonzero.
+ */
+static void instrument_to_sample(DUMB_IT_SIGDATA *sigdata, IT_CHANNEL *channel)
+{
+	if (sigdata->flags & IT_USE_INSTRUMENTS) {
+		if (channel->instrument >= 1 && channel->instrument <= sigdata->n_instruments) {
+			if (channel->note < 120) {
+				channel->sample = sigdata->instrument[channel->instrument-1].map_sample[channel->note];
+				channel->truenote = sigdata->instrument[channel->instrument-1].map_note[channel->note];
+			} else
+				channel->sample = 0;
+		} else
+			channel->sample = 0;
+	} else {
+		channel->sample = channel->instrument;
+		channel->truenote = channel->note;
+	}
+	if (!(channel->sample >= 1 && channel->sample <= sigdata->n_samples && (sigdata->sample[channel->sample-1].flags & IT_SAMPLE_EXISTS) && sigdata->sample[channel->sample-1].C5_speed))
+		channel->sample = 0;
+}
+
+
+
+static void fix_sample_looping(IT_PLAYING *playing)
+{
+	if ((playing->sample->flags & (IT_SAMPLE_LOOP | IT_SAMPLE_SUS_LOOP)) ==
+	                              (IT_SAMPLE_LOOP | IT_SAMPLE_SUS_LOOP)) {
+		if (playing->resampler.dir < 0) {
+			playing->resampler.pos = (playing->sample->sus_loop_end << 1) - 1 - playing->resampler.pos;
+			playing->resampler.subpos ^= 65535;
+			playing->resampler.dir = 1;
+		}
+
+		playing->resampler.pos += playing->time_lost;
+		// XXX what
+		playing->time_lost = 0;
+	}
+}
+
+
+
+static void it_compatible_gxx_retrigger(DUMB_IT_SIGDATA *sigdata, IT_CHANNEL *channel)
+{
+	int flags = 0;
+	if (channel->sample) {
+		if (sigdata->flags & IT_USE_INSTRUMENTS) {
+			if (!(channel->playing->flags & IT_PLAYING_SUSTAINOFF)) {
+				if (channel->playing->env_instrument->volume_envelope.flags & IT_ENVELOPE_CARRY)
+					flags |= 1;
+				if (channel->playing->env_instrument->pan_envelope.flags & IT_ENVELOPE_CARRY)
+					flags |= 2;
+				if (channel->playing->env_instrument->pitch_envelope.flags & IT_ENVELOPE_CARRY)
+					flags |= 4;
+			}
+		}
+	}
+	if (!(flags & 1)) {
+		channel->playing->volume_envelope.next_node = 0;
+		channel->playing->volume_envelope.tick = 0;
+	}
+	if (!(flags & 2)) {
+		channel->playing->pan_envelope.next_node = 0;
+		channel->playing->pan_envelope.tick = 0;
+	}
+	if (!(flags & 4)) {
+		channel->playing->pitch_envelope.next_node = 0;
+		channel->playing->pitch_envelope.tick = 0;
+	}
+	channel->playing->fadeoutcount = 1024;
+	// Should we remove IT_PLAYING_BACKGROUND? Test with sample with sustain loop...
+	channel->playing->flags &= ~(IT_PLAYING_BACKGROUND | IT_PLAYING_SUSTAINOFF | IT_PLAYING_FADING | IT_PLAYING_DEAD);
+	it_playing_update_resamplers(channel->playing);
+
+	if (!flags && channel->sample)
+		if (sigdata->flags & IT_USE_INSTRUMENTS)
+			channel->playing->env_instrument = &sigdata->instrument[channel->instrument-1];
+}
+
+
+
+static void it_note_off(IT_PLAYING *playing)
+{
+	if (playing) {
+		playing->enabled_envelopes |= IT_ENV_VOLUME;
+		playing->flags |= IT_PLAYING_BACKGROUND | IT_PLAYING_SUSTAINOFF;
+		fix_sample_looping(playing);
+		it_playing_update_resamplers(playing);
+		if (playing->instrument)
+			if ((playing->instrument->volume_envelope.flags & (IT_ENVELOPE_ON | IT_ENVELOPE_LOOP_ON)) != IT_ENVELOPE_ON)
+				playing->flags |= IT_PLAYING_FADING;
+	}
+}
+
+
+
+static void xm_note_off(DUMB_IT_SIGDATA *sigdata, IT_CHANNEL *channel)
+{
+	if (channel->playing) {
+		if (!channel->instrument || channel->instrument > sigdata->n_instruments ||
+			!(sigdata->instrument[channel->instrument-1].volume_envelope.flags & IT_ENVELOPE_ON))
+			//if (!(entry->mask & IT_ENTRY_INSTRUMENT))
+			// dunno what that was there for ...
+				channel->volume = 0;
+		channel->playing->flags |= IT_PLAYING_SUSTAINOFF | IT_PLAYING_FADING;
+		it_playing_update_resamplers(channel->playing);
+	}
+}
+
+
+static void recalculate_it_envelope_node(IT_PLAYING_ENVELOPE *pe, IT_ENVELOPE *e)
+{
+	int envpos = pe->tick;
+	unsigned int pt = e->n_nodes - 1;
+	unsigned int i;
+	for (i = 0; i < (unsigned int)(e->n_nodes - 1); ++i)
+	{
+		if (envpos <= e->node_t[i])
+		{
+			pt = i;
+			break;
+		}
+	}
+	pe->next_node = pt;
+}
+
+
+static void recalculate_it_envelope_nodes(IT_PLAYING *playing)
+{
+	recalculate_it_envelope_node(&playing->volume_envelope, &playing->env_instrument->volume_envelope);
+	recalculate_it_envelope_node(&playing->pan_envelope, &playing->env_instrument->pitch_envelope);
+	recalculate_it_envelope_node(&playing->pitch_envelope, &playing->env_instrument->pitch_envelope);
+}
+
+
+static void it_retrigger_note(DUMB_IT_SIGRENDERER *sigrenderer, IT_CHANNEL *channel)
+{
+	int vol_env_tick = 0;
+	int pan_env_tick = 0;
+	int pitch_env_tick = 0;
+
+	DUMB_IT_SIGDATA *sigdata = sigrenderer->sigdata;
+	unsigned char nna = ~0;
+	int i, envelopes_copied = 0;
+
+	if (channel->playing) {
+		if (channel->note == IT_NOTE_CUT)
+			nna = NNA_NOTE_CUT;
+		else if (channel->note == IT_NOTE_OFF)
+			nna = NNA_NOTE_OFF;
+		else if (channel->note > 120)
+			nna = NNA_NOTE_FADE;
+		else if (!channel->playing->instrument || (channel->playing->flags & IT_PLAYING_DEAD))
+			nna = NNA_NOTE_CUT;
+		else if (channel->new_note_action != 0xFF)
+		{
+			nna = channel->new_note_action;
+		}
+		else
+			nna = channel->playing->instrument->new_note_action;
+
+		if (!(channel->playing->flags & IT_PLAYING_SUSTAINOFF))
+		{
+			if (nna != NNA_NOTE_CUT)
+				vol_env_tick = channel->playing->volume_envelope.tick;
+			pan_env_tick = channel->playing->pan_envelope.tick;
+			pitch_env_tick = channel->playing->pitch_envelope.tick;
+			envelopes_copied = 1;
+		}
+
+		switch (nna) {
+			case NNA_NOTE_CUT:
+				channel->playing->declick_stage = 3;
+				break;
+			case NNA_NOTE_OFF:
+				it_note_off(channel->playing);
+				break;
+			case NNA_NOTE_FADE:
+				channel->playing->flags |= IT_PLAYING_BACKGROUND | IT_PLAYING_FADING;
+				break;
+		}
+	}
+
+	channel->new_note_action = 0xFF;
+
+	if (channel->sample == 0 || channel->note > 120)
+		return;
+
+	channel->destnote = IT_NOTE_OFF;
+
+	if (channel->playing) {
+		for (i = 0; i < DUMB_IT_N_NNA_CHANNELS; i++) {
+			if (!sigrenderer->playing[i]) {
+				sigrenderer->playing[i] = channel->playing;
+				channel->playing = NULL;
+				break;
+			}
+		}
+
+		if (sigrenderer->sigdata->flags & IT_USE_INSTRUMENTS)
+		{
+			for (i = 0; i < DUMB_IT_N_NNA_CHANNELS; i++) {
+				IT_PLAYING * playing = sigrenderer->playing[i];
+				if (playing && playing->channel == channel && playing->instrument->dup_check_type) {
+					int match = 1;
+					switch (playing->instrument->dup_check_type)
+					{
+					case DCT_NOTE:
+						match = (channel->truenote == playing->note);
+					case DCT_SAMPLE:
+						match = match && (channel->sample == playing->sampnum);
+					case DCT_INSTRUMENT:
+						match = match && (channel->instrument == playing->instnum);
+						break;
+					}
+
+					if (match)
+					{
+						switch (playing->instrument->dup_check_action)
+						{
+						case DCA_NOTE_CUT:
+							playing->declick_stage = 3;
+							if (channel->playing == playing) channel->playing = NULL;
+							break;
+						case DCA_NOTE_OFF:
+							if (!(playing->flags & IT_PLAYING_SUSTAINOFF))
+								it_note_off(playing);
+							break;
+						case DCA_NOTE_FADE:
+							playing->flags |= IT_PLAYING_BACKGROUND | IT_PLAYING_FADING;
+							break;
+						}
+					}
+				}
+			}
+		}
+
+/** WARNING - come up with some more heuristics for replacing old notes */
+#if 0
+		if (channel->playing) {
+			for (i = 0; i < DUMB_IT_N_NNA_CHANNELS; i++) {
+				if (sigrenderer->playing[i]->flags & IT_PLAYING_BACKGROUND) {
+					write_seqtime();
+					sequence_c(SEQUENCE_STOP_SIGNAL);
+					sequence_c(i);
+					channel->VChannel = &module->VChannel[i];
+					break;
+				}
+			}
+		}
+#endif
+	}
+
+	if (channel->playing)
+		free_playing(sigrenderer, channel->playing);
+
+	channel->playing = new_playing(sigrenderer);
+
+	if (!channel->playing)
+		return;
+
+	if (!envelopes_copied && sigdata->flags & IT_USE_INSTRUMENTS) {
+		for (i = 0; i < DUMB_IT_N_NNA_CHANNELS; i++) {
+			IT_PLAYING * playing = sigrenderer->playing[i];
+			if (!playing || playing->channel != channel) continue;
+			if (playing->flags & IT_PLAYING_SUSTAINOFF) continue;
+			if (nna != NNA_NOTE_CUT)
+				vol_env_tick = playing->volume_envelope.tick;
+			pan_env_tick = playing->pan_envelope.tick;
+			pitch_env_tick = playing->pitch_envelope.tick;
+			envelopes_copied = 1;
+			break;
+		}
+	}				
+
+	channel->playing->flags = 0;
+	channel->playing->resampling_quality = sigrenderer->resampling_quality;
+	channel->playing->channel = channel;
+	channel->playing->sample = &sigdata->sample[channel->sample-1];
+	if (sigdata->flags & IT_USE_INSTRUMENTS)
+		channel->playing->instrument = &sigdata->instrument[channel->instrument-1];
+	else
+		channel->playing->instrument = NULL;
+	channel->playing->env_instrument = channel->playing->instrument;
+	channel->playing->sampnum = channel->sample;
+	channel->playing->instnum = channel->instrument;
+	channel->playing->declick_stage = 0;
+	channel->playing->channel_volume = channel->channelvolume;
+	channel->playing->note = channel->truenote;
+	channel->playing->enabled_envelopes = 0;
+	channel->playing->volume_offset = 0;
+	channel->playing->panning_offset = 0;
+	//channel->playing->output = channel->output;
+	if (sigdata->flags & IT_USE_INSTRUMENTS) {
+		IT_PLAYING * playing = channel->playing;
+		IT_INSTRUMENT * instrument = playing->instrument;
+		if (instrument->volume_envelope.flags & IT_ENVELOPE_ON) playing->enabled_envelopes |= IT_ENV_VOLUME;
+		if (instrument->pan_envelope.flags & IT_ENVELOPE_ON) playing->enabled_envelopes |= IT_ENV_PANNING;
+		if (instrument->pitch_envelope.flags & IT_ENVELOPE_ON) playing->enabled_envelopes |= IT_ENV_PITCH;
+		if (instrument->random_volume) playing->volume_offset = (rand() % (instrument->random_volume * 2 + 1)) - instrument->random_volume;
+		if (instrument->random_pan) playing->panning_offset = (rand() % (instrument->random_pan * 2 + 1)) - instrument->random_pan;
+		//if (instrument->output) playing->output = instrument->output;
+	}
+	channel->playing->filter_cutoff = 127;
+	channel->playing->filter_resonance = 0;
+	channel->playing->true_filter_cutoff = 127 << 8;
+	channel->playing->true_filter_resonance = 0;
+	channel->playing->vibrato_speed = 0;
+	channel->playing->vibrato_depth = 0;
+	channel->playing->vibrato_n = 0;
+	channel->playing->vibrato_time = 0;
+	channel->playing->vibrato_waveform = channel->vibrato_waveform;
+	channel->playing->tremolo_speed = 0;
+	channel->playing->tremolo_depth = 0;
+	channel->playing->tremolo_time = 0;
+	channel->playing->tremolo_waveform = channel->tremolo_waveform;
+	channel->playing->panbrello_speed = 0;
+	channel->playing->panbrello_depth = 0;
+	channel->playing->panbrello_time = 0;
+	channel->playing->panbrello_waveform = channel->panbrello_waveform;
+	channel->playing->panbrello_random = 0;
+	channel->playing->sample_vibrato_time = 0;
+	channel->playing->sample_vibrato_waveform = channel->playing->sample->vibrato_waveform;
+	channel->playing->sample_vibrato_depth = 0;
+	channel->playing->slide = 0;
+	channel->playing->finetune = channel->playing->sample->finetune;
+
+	if (sigdata->flags & IT_USE_INSTRUMENTS)
+	{
+		if (envelopes_copied && channel->playing->env_instrument->volume_envelope.flags & IT_ENVELOPE_CARRY) {
+			channel->playing->volume_envelope.tick = vol_env_tick;
+		} else {
+			channel->playing->volume_envelope.tick = 0;
+		}
+		if (envelopes_copied && channel->playing->env_instrument->pan_envelope.flags & IT_ENVELOPE_CARRY) {
+			channel->playing->pan_envelope.tick = pan_env_tick;
+		} else {
+			channel->playing->pan_envelope.tick = 0;
+		}
+		if (envelopes_copied && channel->playing->env_instrument->pitch_envelope.flags & IT_ENVELOPE_CARRY) {
+			channel->playing->pitch_envelope.tick = pitch_env_tick;
+		} else {
+			channel->playing->pitch_envelope.tick = 0;
+		}
+		recalculate_it_envelope_nodes(channel->playing);
+	}
+	channel->playing->fadeoutcount = 1024;
+	it_reset_filter_state(&channel->playing->filter_state[0]);
+	it_reset_filter_state(&channel->playing->filter_state[1]);
+	it_playing_reset_resamplers(channel->playing, 0);
+
+	/** WARNING - is everything initialised? */
+}
+
+
+
+static void get_default_volpan(DUMB_IT_SIGDATA *sigdata, IT_CHANNEL *channel)
+{
+	if (channel->sample == 0)
+		return;
+
+	channel->volume = sigdata->sample[channel->sample-1].default_volume;
+
+	if (sigdata->flags & IT_WAS_AN_XM) {
+		if (!(sigdata->flags & IT_WAS_A_MOD))
+			channel->truepan = 32 + sigdata->sample[channel->sample-1].default_pan*64;
+		return;
+	}
+
+	{
+		int pan = sigdata->sample[channel->sample-1].default_pan;
+		if (pan >= 128 && pan <= 192) {
+			channel->pan = pan - 128;
+			return;
+		}
+	}
+
+	if (sigdata->flags & IT_USE_INSTRUMENTS) {
+		IT_INSTRUMENT *instrument = &sigdata->instrument[channel->instrument-1];
+		if (instrument->default_pan <= 64)
+			channel->pan = instrument->default_pan;
+		if (instrument->filter_cutoff >= 128)
+			channel->filter_cutoff = instrument->filter_cutoff - 128;
+		if (instrument->filter_resonance >= 128)
+			channel->filter_resonance = instrument->filter_resonance - 128;
+	}
+}
+
+
+
+static void get_true_pan(DUMB_IT_SIGDATA *sigdata, IT_CHANNEL *channel)
+{
+	channel->truepan = channel->pan << IT_ENVELOPE_SHIFT;
+
+	if (channel->sample && !IT_IS_SURROUND_SHIFTED(channel->truepan) && (sigdata->flags & IT_USE_INSTRUMENTS)) {
+		IT_INSTRUMENT *instrument = &sigdata->instrument[channel->instrument-1];
+		int truepan = channel->truepan;
+		truepan += (channel->note - instrument->pp_centre) * instrument->pp_separation << (IT_ENVELOPE_SHIFT - 3);
+		channel->truepan = (unsigned short)MID(0, truepan, 64 << IT_ENVELOPE_SHIFT);
+	}
+}
+
+
+
+static void post_process_it_volpan(DUMB_IT_SIGRENDERER *sigrenderer, IT_ENTRY *entry)
+{
+	IT_CHANNEL *channel = &sigrenderer->channel[(int)entry->channel];
+
+	if (entry->mask & IT_ENTRY_VOLPAN) {
+		if (entry->volpan <= 84) {
+			/* Volume */
+			/* Fine volume slide up */
+			/* Fine volume slide down */
+		} else if (entry->volpan <= 94) {
+			/* Volume slide up */
+			unsigned char v = entry->volpan - 85;
+			if (v == 0)
+				v = channel->lastvolslide;
+			channel->lastvolslide = v;
+			/* = effect Dx0 where x == entry->volpan - 85 */
+			channel->volslide += v;
+		} else if (entry->volpan <= 104) {
+			/* Volume slide down */
+			unsigned char v = entry->volpan - 95;
+			if (v == 0)
+				v = channel->lastvolslide;
+			channel->lastvolslide = v;
+			/* = effect D0x where x == entry->volpan - 95 */
+			channel->volslide -= v;
+		} else if (entry->volpan <= 114) {
+			/* Portamento down */
+			unsigned char v = (entry->volpan - 105) << 2;
+			if (v == 0)
+				v = channel->lastEF;
+			channel->lastEF = v;
+			channel->portamento -= v << 4;
+		} else if (entry->volpan <= 124) {
+			/* Portamento up */
+			unsigned char v = (entry->volpan - 115) << 2;
+			if (v == 0)
+				v = channel->lastEF;
+			channel->lastEF = v;
+			channel->portamento += v << 4;
+		} else if (entry->volpan <= 202) {
+			/* Pan */
+			/* Tone Portamento */
+		} else if (entry->volpan <= 212) {
+			/* Vibrato */
+			/* This is unaffected by IT_OLD_EFFECTS. However, if v == 0, then any doubling of depth that happened before (with Hxy in the effect column) will be preserved. */
+			unsigned char v = entry->volpan - 203;
+			if (v == 0)
+				v = channel->lastHdepth;
+			else {
+				v <<= 2;
+				channel->lastHdepth = v;
+			}
+			if (channel->playing) {
+				channel->playing->vibrato_speed = channel->lastHspeed;
+				channel->playing->vibrato_depth = v;
+				channel->playing->vibrato_n++;
+			}
+		}
+	}
+}
+
+
+
+static void it_send_midi(DUMB_IT_SIGRENDERER *sigrenderer, IT_CHANNEL *channel, unsigned char midi_byte)
+{
+	if (sigrenderer->callbacks->midi)
+		if ((*sigrenderer->callbacks->midi)(sigrenderer->callbacks->midi_data, (int)(channel - sigrenderer->channel), midi_byte))
+			return;
+
+	switch (channel->midi_state) {
+		case 4: /* Ready to receive resonance parameter */
+			if (midi_byte < 0x80) channel->filter_resonance = midi_byte;
+			channel->midi_state = 0;
+			break;
+		case 3: /* Ready to receive cutoff parameter */
+			if (midi_byte < 0x80) channel->filter_cutoff = midi_byte;
+			channel->midi_state = 0;
+			break;
+		case 2: /* Ready for byte specifying which parameter will follow */
+			if (midi_byte == 0) /* Cutoff */
+				channel->midi_state = 3;
+			else if (midi_byte == 1) /* Resonance */
+				channel->midi_state = 4;
+			else
+				channel->midi_state = 0;
+			break;
+		default: /* Counting initial F0 bytes */
+			switch (midi_byte) {
+				case 0xF0:
+					channel->midi_state++;
+					break;
+				case 0xFA:
+				case 0xFC:
+				case 0xFF:
+					/* Reset filter parameters for all channels */
+					{
+						int i;
+						for (i = 0; i < DUMB_IT_N_CHANNELS; i++) {
+							sigrenderer->channel[i].filter_cutoff = 127;
+							sigrenderer->channel[i].filter_resonance = 0;
+							//// should we be resetting channel[i].playing->filter_* here?
+						}
+					}
+					/* Fall through */
+				default:
+					channel->midi_state = 0;
+					break;
+			}
+	}
+}
+
+
+
+static void xm_envelope_calculate_value(IT_ENVELOPE *envelope, IT_PLAYING_ENVELOPE *pe)
+{
+	if (pe->next_node <= 0)
+		pe->value = envelope->node_y[0] << IT_ENVELOPE_SHIFT;
+	else if (pe->next_node >= envelope->n_nodes)
+		pe->value = envelope->node_y[envelope->n_nodes-1] << IT_ENVELOPE_SHIFT;
+	else {
+		int ys = envelope->node_y[pe->next_node-1] << IT_ENVELOPE_SHIFT;
+		int ts = envelope->node_t[pe->next_node-1];
+		int te = envelope->node_t[pe->next_node];
+
+		if (ts == te)
+			pe->value = ys;
+		else {
+			int ye = envelope->node_y[pe->next_node] << IT_ENVELOPE_SHIFT;
+			int t = pe->tick;
+
+			pe->value = ys + (ye - ys) * (t - ts) / (te - ts);
+		}
+	}
+}
+
+
+
+extern const char xm_convert_vibrato[];
+
+const char mod_convert_vibrato[] = {
+	IT_VIBRATO_SINE,
+	IT_VIBRATO_RAMP_UP, /* this will be inverted by IT_OLD_EFFECTS */
+	IT_VIBRATO_XM_SQUARE,
+	IT_VIBRATO_XM_SQUARE
+};
+
+/* Returns 1 if a callback caused termination of playback. */
+static int process_effects(DUMB_IT_SIGRENDERER *sigrenderer, IT_ENTRY *entry, int ignore_cxx)
+{
+	DUMB_IT_SIGDATA *sigdata = sigrenderer->sigdata;
+	IT_PLAYING *playing;
+	int i;
+
+	IT_CHANNEL *channel = &sigrenderer->channel[(int)entry->channel];
+
+	if (entry->mask & IT_ENTRY_EFFECT) {
+		switch (entry->effect) {
+/*
+Notes about effects (as compared to other module formats)
+
+C               This is now in *HEX*. (Used to be in decimal in ST3)
+E/F/G/H/U       You need to check whether the song uses Amiga/Linear slides.
+H/U             Vibrato in Impulse Tracker is two times finer than in
+                any other tracker and is updated EVERY tick.
+                If "Old Effects" is *ON*, then the vibrato is played in the
+                normal manner (every non-row tick and normal depth)
+E/F/G           These commands ALL share the same memory.
+Oxx             Offsets to samples are to the 'xx00th' SAMPLE. (ie. for
+                16 bit samples, the offset is xx00h*2)
+                Oxx past the sample end will be ignored, unless "Old Effects"
+                is ON, in which case the Oxx will play from the end of the
+                sample.
+Yxy             This uses a table 4 times larger (hence 4 times slower) than
+                vibrato or tremelo. If the waveform is set to random, then
+                the 'speed' part of the command is interpreted as a delay.
+*/
+			case IT_SET_SPEED:
+				if (entry->effectvalue)
+				{
+					/*if (entry->effectvalue == 255)
+						if (sigrenderer->callbacks->xm_speed_zero && (*sigrenderer->callbacks->xm_speed_zero)(sigrenderer->callbacks->xm_speed_zero_data))
+							return 1;*/
+					if (sigdata->flags & IT_WAS_AN_STM) {
+						int n = entry->effectvalue;
+						if (n >= 32) {
+							sigrenderer->tick = sigrenderer->speed = n;
+						}
+					} else {
+						sigrenderer->tick = sigrenderer->speed = entry->effectvalue;
+					}
+				}
+				else if ((sigdata->flags & (IT_WAS_AN_XM|IT_WAS_A_MOD)) == IT_WAS_AN_XM) {
+#ifdef BIT_ARRAY_BULLSHIT
+					bit_array_set(sigrenderer->played, sigrenderer->order * 256 + sigrenderer->row);
+#endif
+					sigrenderer->speed = 0;
+					if (sigrenderer->callbacks->xm_speed_zero && (*sigrenderer->callbacks->xm_speed_zero)(sigrenderer->callbacks->xm_speed_zero_data))
+						return 1;
+				}
+				break;
+
+			case IT_BREAK_TO_ROW:
+				if (ignore_cxx) break;
+				sigrenderer->breakrow = entry->effectvalue;
+				/* XXX jump and break on the same row */
+				if ( ( ( sigrenderer->processrow | 0xC00 ) == 0xFFFE ) &&
+					! ( sigrenderer->processrow & 0x400 ) ) {
+					sigrenderer->processrow = 0xFFFE & ~0xC00;
+				} else {
+					sigrenderer->processorder = sigrenderer->order;
+					sigrenderer->processrow = 0xFFFE & ~0x800;
+				}
+				break;
+
+			case IT_VOLSLIDE_VIBRATO:
+				for (i = -1; i < DUMB_IT_N_NNA_CHANNELS; i++) {
+					if (i < 0) playing = channel->playing;
+					else {
+						playing = sigrenderer->playing[i];
+						if (!playing || playing->channel != channel) continue;
+					}
+					if (playing) {
+						playing->vibrato_speed = channel->lastHspeed;
+						playing->vibrato_depth = channel->lastHdepth;
+						playing->vibrato_n++;
+					}
+				}
+				/* Fall through and process volume slide. */
+			case IT_VOLUME_SLIDE:
+			case IT_VOLSLIDE_TONEPORTA:
+				/* The tone portamento component is handled elsewhere. */
+				{
+					unsigned char v = entry->effectvalue;
+					if (!(sigdata->flags & IT_WAS_A_MOD)) {
+						if (v == 0)
+							v = channel->lastDKL;
+						channel->lastDKL = v;
+					}
+					if (!(sigdata->flags & IT_WAS_AN_XM)) {
+						int clip = (sigdata->flags & IT_WAS_AN_S3M) ? 63 : 64;
+						if ((v & 0x0F) == 0x0F) {
+							if (!(v & 0xF0)) {
+								channel->volslide = -15;
+								channel->volume -= 15;
+								if (channel->volume > clip) channel->volume = 0;
+							} else {
+								channel->volume += v >> 4;
+								if (channel->volume > clip) channel->volume = clip;
+							}
+						} else if ((v & 0xF0) == 0xF0) {
+							if (!(v & 0x0F)) {
+								channel->volslide = 15;
+								channel->volume += 15;
+								if (channel->volume > clip) channel->volume = clip;
+							} else {
+								channel->volume -= v & 15;
+								if (channel->volume > clip) channel->volume = 0;
+							}
+						} else if (!(v & 0x0F)) {
+							channel->volslide = v >> 4;
+						} else {
+							channel->volslide = -(v & 15);
+						}
+					} else {
+						if ((v & 0x0F) == 0) { /* Dx0 */
+							channel->volslide = v >> 4;
+						} else if ((v & 0xF0) == 0) { /* D0x */
+							channel->volslide = -v;
+						} else if ((v & 0x0F) == 0x0F) { /* DxF */
+							channel->volume += v >> 4;
+							if (channel->volume > 64) channel->volume = 64;
+						} else if ((v & 0xF0) == 0xF0) { /* DFx */
+							channel->volume -= v & 15;
+							if (channel->volume > 64) channel->volume = 0;
+						}
+					}
+				}
+				break;
+			case IT_XM_FINE_VOLSLIDE_DOWN:
+				{
+					unsigned char v = entry->effectvalue;
+					if (v == 0)
+						v = channel->xm_lastEB;
+					channel->xm_lastEB = v;
+					channel->volume -= v;
+					if (channel->volume > 64) channel->volume = 0;
+				}
+				break;
+			case IT_XM_FINE_VOLSLIDE_UP:
+				{
+					unsigned char v = entry->effectvalue;
+					if (v == 0)
+						v = channel->xm_lastEA;
+					channel->xm_lastEA = v;
+					channel->volume += v;
+					if (channel->volume > 64) channel->volume = 64;
+				}
+				break;
+			case IT_PORTAMENTO_DOWN:
+				{
+					unsigned char v = entry->effectvalue;
+					if (sigdata->flags & (IT_WAS_AN_XM|IT_WAS_A_669)) {
+						if (!(sigdata->flags & IT_WAS_A_MOD)) {
+							if (v == 0xF0)
+								v |= channel->xm_lastE2;
+							else if (v >= 0xF0)
+								channel->xm_lastE2 = v & 15;
+							else if (v == 0xE0)
+								v |= channel->xm_lastX2;
+							else
+								channel->xm_lastX2 = v & 15;
+						}
+					} else if (sigdata->flags & IT_WAS_AN_S3M) {
+						if (v == 0)
+							v = channel->lastDKL;
+						channel->lastDKL = v;
+					} else {
+						if (v == 0)
+							v = channel->lastEF;
+						channel->lastEF = v;
+					}
+					for (i = -1; i < DUMB_IT_N_NNA_CHANNELS; i++) {
+						if (i < 0) playing = channel->playing;
+						else {
+							playing = sigrenderer->playing[i];
+							if (!playing || playing->channel != channel) continue;
+						}
+						if (playing) {
+							if ((v & 0xF0) == 0xF0)
+								playing->slide -= (v & 15) << 4;
+							else if ((v & 0xF0) == 0xE0)
+								playing->slide -= (v & 15) << 2;
+							else if (i < 0 && sigdata->flags & IT_WAS_A_669)
+								channel->portamento -= v << 3;
+							else if (i < 0)
+								channel->portamento -= v << 4;
+						}
+					}
+				}
+				break;
+			case IT_PORTAMENTO_UP:
+				{
+					unsigned char v = entry->effectvalue;
+					if (sigdata->flags & (IT_WAS_AN_XM|IT_WAS_A_669)) {
+						if (!(sigdata->flags & IT_WAS_A_MOD)) {
+							if (v == 0xF0)
+								v |= channel->xm_lastE1;
+							else if (v >= 0xF0)
+								channel->xm_lastE1 = v & 15;
+							else if (v == 0xE0)
+								v |= channel->xm_lastX1;
+							else
+								channel->xm_lastX1 = v & 15;
+						}
+					} else if (sigdata->flags & IT_WAS_AN_S3M) {
+						if (v == 0)
+							v = channel->lastDKL;
+						channel->lastDKL = v;
+					} else {
+						if (v == 0)
+							v = channel->lastEF;
+						channel->lastEF = v;
+					}
+					for (i = -1; i < DUMB_IT_N_NNA_CHANNELS; i++) {
+						if (i < 0) playing = channel->playing;
+						else {
+							playing = sigrenderer->playing[i];
+							if (!playing || playing->channel != channel) continue;
+						}
+						if (playing) {
+							if ((v & 0xF0) == 0xF0)
+								playing->slide += (v & 15) << 4;
+							else if ((v & 0xF0) == 0xE0)
+								playing->slide += (v & 15) << 2;
+							else if (i < 0 && sigdata->flags & IT_WAS_A_669)
+								channel->portamento += v << 3;
+							else if (i < 0)
+								channel->portamento += v << 4;
+						}
+					}
+				}
+				break;
+			case IT_XM_PORTAMENTO_DOWN:
+				{
+					unsigned char v = entry->effectvalue;
+					if (!(sigdata->flags & IT_WAS_A_MOD)) {
+						if (v == 0)
+							v = channel->lastJ;
+						channel->lastJ = v;
+					}
+					if (channel->playing)
+						channel->portamento -= v << 4;
+				}
+				break;
+			case IT_XM_PORTAMENTO_UP:
+				{
+					unsigned char v = entry->effectvalue;
+					if (!(sigdata->flags & IT_WAS_A_MOD)) {
+						if (v == 0)
+							v = channel->lastEF;
+						channel->lastEF = v;
+					}
+					if (channel->playing)
+						channel->portamento += v << 4;
+				}
+				break;
+			case IT_XM_KEY_OFF:
+				channel->key_off_count = entry->effectvalue;
+				if (!channel->key_off_count) xm_note_off(sigdata, channel);
+				break;
+			case IT_VIBRATO:
+				{
+					if (entry->effectvalue || !(sigdata->flags & IT_WAS_A_669)) {
+						unsigned char speed = entry->effectvalue >> 4;
+						unsigned char depth = entry->effectvalue & 15;
+						if (speed == 0)
+							speed = channel->lastHspeed;
+						channel->lastHspeed = speed;
+						if (depth == 0)
+							depth = channel->lastHdepth;
+						else {
+							if (sigdata->flags & IT_OLD_EFFECTS && !(sigdata->flags & IT_WAS_A_MOD))
+								depth <<= 3;
+							else
+								depth <<= 2;
+							channel->lastHdepth = depth;
+						}
+						for (i = -1; i < DUMB_IT_N_NNA_CHANNELS; i++) {
+							if (i < 0) playing = channel->playing;
+							else {
+								playing = sigrenderer->playing[i];
+								if (!playing || playing->channel != channel) continue;
+							}
+							if (playing) {
+								playing->vibrato_speed = speed;
+								playing->vibrato_depth = depth;
+								playing->vibrato_n++;
+							}
+						}
+					}
+				}
+				break;
+			case IT_TREMOR:
+				{
+					unsigned char v = entry->effectvalue;
+					if (v == 0) {
+						if (sigdata->flags & IT_WAS_AN_S3M)
+							v = channel->lastDKL;
+						else
+							v = channel->lastI;
+					}
+					else if (!(sigdata->flags & IT_OLD_EFFECTS)) {
+						if (v & 0xF0) v -= 0x10;
+						if (v & 0x0F) v -= 0x01;
+					}
+					if (sigdata->flags & IT_WAS_AN_S3M)
+						channel->lastDKL = v;
+					else
+						channel->lastI = v;
+					channel->tremor_time |= 128;
+				}
+				update_tremor(channel);
+				break;
+			case IT_ARPEGGIO:
+				{
+					unsigned char v = entry->effectvalue;
+					/* XM files have no memory for arpeggio (000 = no effect)
+					 * and we use lastJ for portamento down instead.
+					 */
+					if (!(sigdata->flags & IT_WAS_AN_XM)) {
+						if (sigdata->flags & IT_WAS_AN_S3M) {
+							if (v == 0)
+								v = channel->lastDKL;
+							channel->lastDKL = v;
+						} else {
+							if (v == 0)
+								v = channel->lastJ;
+							channel->lastJ = v;
+						}
+					}
+					channel->arpeggio_offsets[0] = 0;
+					channel->arpeggio_offsets[1] = (v & 0xF0) >> 4;
+					channel->arpeggio_offsets[2] = (v & 0x0F);
+					channel->arpeggio_table = (const unsigned char *)(((sigdata->flags & (IT_WAS_AN_XM|IT_WAS_A_MOD))==IT_WAS_AN_XM) ? &arpeggio_xm : &arpeggio_mod);
+				}
+				break;
+			case IT_SET_CHANNEL_VOLUME:
+				if (sigdata->flags & IT_WAS_AN_XM)
+					channel->volume = MIN(entry->effectvalue, 64);
+				else if (entry->effectvalue <= 64)
+					channel->channelvolume = entry->effectvalue;
+#ifdef VOLUME_OUT_OF_RANGE_SETS_MAXIMUM
+				else
+					channel->channelvolume = 64;
+#endif
+				if (channel->playing)
+					channel->playing->channel_volume = channel->channelvolume;
+				break;
+			case IT_CHANNEL_VOLUME_SLIDE:
+				{
+					unsigned char v = entry->effectvalue;
+					if (v == 0)
+						v = channel->lastN;
+					channel->lastN = v;
+					if ((v & 0x0F) == 0) { /* Nx0 */
+						channel->channelvolslide = v >> 4;
+					} else if ((v & 0xF0) == 0) { /* N0x */
+						channel->channelvolslide = -v;
+					} else {
+						if ((v & 0x0F) == 0x0F) { /* NxF */
+							channel->channelvolume += v >> 4;
+							if (channel->channelvolume > 64) channel->channelvolume = 64;
+						} else if ((v & 0xF0) == 0xF0) { /* NFx */
+							channel->channelvolume -= v & 15;
+							if (channel->channelvolume > 64) channel->channelvolume = 0;
+						} else
+							break;
+						if (channel->playing)
+							channel->playing->channel_volume = channel->channelvolume;
+					}
+				}
+				break;
+			case IT_SET_SAMPLE_OFFSET:
+				{
+					unsigned char v = entry->effectvalue;
+					/*if (sigdata->flags & IT_WAS_A_MOD) {
+						if (v == 0) break;
+					} else*/ {
+						if (v == 0)
+							v = channel->lastO;
+						channel->lastO = v;
+					}
+					/* Note: we set the offset even if tone portamento is
+					 * specified. Impulse Tracker does the same.
+					 */
+					if (entry->mask & IT_ENTRY_NOTE) {
+						if (channel->playing) {
+							int offset = ((int)channel->high_offset << 16) | ((int)v << 8);
+							IT_PLAYING *playing = channel->playing;
+							IT_SAMPLE *sample = playing->sample;
+							int end;
+							if ((sample->flags & IT_SAMPLE_SUS_LOOP) && !(playing->flags & IT_PLAYING_SUSTAINOFF))
+								end = sample->sus_loop_end;
+							else if (sample->flags & IT_SAMPLE_LOOP)
+								end = sample->loop_end;
+							else {
+								end = sample->length;
+								if ( sigdata->flags & IT_WAS_PROCESSED && end > 64 ) // XXX bah damn LPC and edge case modules
+									end -= 64;
+							}
+							if ((sigdata->flags & IT_WAS_A_PTM) && (sample->flags & IT_SAMPLE_16BIT))
+								offset >>= 1;
+							if (offset < end) {
+								it_playing_reset_resamplers(playing, offset);
+								playing->declick_stage = 0;
+							} else if (sigdata->flags & IT_OLD_EFFECTS) {
+								it_playing_reset_resamplers(playing, end);
+								playing->declick_stage = 0;
+							}
+						}
+					}
+				}
+				break;
+			case IT_PANNING_SLIDE:
+				/** JULIEN: guess what? the docs are wrong! (how unusual ;)
+				 * Pxy seems to memorize its previous value... and there
+				 * might be other mistakes like that... (sigh!)
+				 */
+				/** ENTHEH: umm... but... the docs say that Pxy memorises its
+				 * value... don't they? :o
+				 */
+				{
+					unsigned char v = entry->effectvalue;
+					int p = channel->truepan;
+					if (sigdata->flags & IT_WAS_AN_XM)
+					{
+						if (IT_IS_SURROUND(channel->pan))
+						{
+							channel->pan = 32;
+							p = 32 + 128 * 64;
+						}
+						p >>= 6;
+					}
+					else {
+						if (IT_IS_SURROUND(channel->pan)) p = 32 << 8;
+						p = (p + 128) >> 8;
+						channel->pan = p;
+					}
+					if (v == 0)
+						v = channel->lastP;
+					channel->lastP = v;
+					if ((v & 0x0F) == 0) { /* Px0 */
+						channel->panslide = -(v >> 4);
+					} else if ((v & 0xF0) == 0) { /* P0x */
+						channel->panslide = v;
+					} else if ((v & 0x0F) == 0x0F) { /* PxF */
+						p -= v >> 4;
+					} else if ((v & 0xF0) == 0xF0) { /* PFx */
+						p += v & 15;
+					}
+					if (sigdata->flags & IT_WAS_AN_XM)
+						channel->truepan = 32 + MID(0, p, 255) * 64;
+					else {
+						if (p < 0) p = 0;
+						else if (p > 64) p = 64;
+						channel->pan = p;
+						channel->truepan = p << 8;
+					}
+				}
+				break;
+			case IT_RETRIGGER_NOTE:
+				{
+					unsigned char v = entry->effectvalue;
+					if (sigdata->flags & IT_WAS_AN_XM) {
+						if ((v & 0x0F) == 0) v |= channel->lastQ & 0x0F;
+						if ((v & 0xF0) == 0) v |= channel->lastQ & 0xF0;
+						channel->lastQ = v;
+					} else if (sigdata->flags & IT_WAS_AN_S3M) {
+						if (v == 0)
+							v = channel->lastDKL;
+						channel->lastDKL = v;
+					} else {
+						if (v == 0)
+							v = channel->lastQ;
+						channel->lastQ = v;
+					}
+					if ((v & 0x0F) == 0) v |= 0x01;
+					channel->retrig = v;
+					if (entry->mask & IT_ENTRY_NOTE) {
+						channel->retrig_tick = v & 0x0F;
+						/* Emulate a bug */
+						if (sigdata->flags & IT_WAS_AN_XM)
+							update_retrig(sigrenderer, channel);
+					} else
+						update_retrig(sigrenderer, channel);
+				}
+				break;
+			case IT_XM_RETRIGGER_NOTE:
+				channel->retrig_tick = channel->xm_retrig = entry->effectvalue;
+				if (entry->effectvalue == 0)
+					if (channel->playing) {
+						it_playing_reset_resamplers(channel->playing, 0);
+						channel->playing->declick_stage = 0;
+					}
+				break;
+			case IT_TREMOLO:
+				{
+					unsigned char speed, depth;
+					if (sigdata->flags & IT_WAS_AN_S3M) {
+						unsigned char v = entry->effectvalue;
+						if (v == 0)
+							v = channel->lastDKL;
+						channel->lastDKL = v;
+						speed = v >> 4;
+						depth = v & 15;
+					} else {
+						speed = entry->effectvalue >> 4;
+						depth = entry->effectvalue & 15;
+						if (speed == 0)
+							speed = channel->lastRspeed;
+						channel->lastRspeed = speed;
+						if (depth == 0)
+							depth = channel->lastRdepth;
+						channel->lastRdepth = depth;
+					}
+					for (i = -1; i < DUMB_IT_N_NNA_CHANNELS; i++) {
+						if (i < 0) playing = channel->playing;
+						else {
+							playing = sigrenderer->playing[i];
+							if (!playing || playing->channel != channel) continue;
+						}
+						if (playing) {
+							playing->tremolo_speed = speed;
+							playing->tremolo_depth = depth;
+						}
+					}
+				}
+				break;
+			case IT_S:
+				{
+					/* channel->lastS was set in update_pattern_variables(). */
+					unsigned char effectvalue = channel->lastS;
+					switch (effectvalue >> 4) {
+						//case IT_S_SET_FILTER:
+							/* Waveforms for commands S3x, S4x and S5x:
+							 *   0: Sine wave
+							 *   1: Ramp down
+							 *   2: Square wave
+							 *   3: Random wave
+							 */
+						case IT_S_SET_GLISSANDO_CONTROL:
+							channel->glissando = effectvalue & 15;
+							break;
+
+						case IT_S_FINETUNE:
+							if (channel->playing) {
+								channel->playing->finetune = ((int)(effectvalue & 15) - 8) << 5;
+							}
+							break;
+
+						case IT_S_SET_VIBRATO_WAVEFORM:
+							{
+								int waveform = effectvalue & 3;
+								if (sigdata->flags & IT_WAS_A_MOD) waveform = mod_convert_vibrato[waveform];
+								else if (sigdata->flags & IT_WAS_AN_XM) waveform = xm_convert_vibrato[waveform];
+								channel->vibrato_waveform = waveform;
+								if (channel->playing) {
+									channel->playing->vibrato_waveform = waveform;
+									if (!(effectvalue & 4))
+										channel->playing->vibrato_time = 0;
+								}
+							}
+							break;
+						case IT_S_SET_TREMOLO_WAVEFORM:
+							{
+								int waveform = effectvalue & 3;
+								if (sigdata->flags & IT_WAS_A_MOD) waveform = mod_convert_vibrato[waveform];
+								else if (sigdata->flags & IT_WAS_AN_XM) waveform = xm_convert_vibrato[waveform];
+								channel->tremolo_waveform = waveform;
+								if (channel->playing) {
+									channel->playing->tremolo_waveform = waveform;
+									if (!(effectvalue & 4))
+										channel->playing->tremolo_time = 0;
+								}
+							}
+							break;
+						case IT_S_SET_PANBRELLO_WAVEFORM:
+							channel->panbrello_waveform = effectvalue & 3;
+							if (channel->playing) {
+								channel->playing->panbrello_waveform = effectvalue & 3;
+								if (!(effectvalue & 4))
+									channel->playing->panbrello_time = 0;
+							}
+							break;
+
+						case IT_S_FINE_PATTERN_DELAY:
+							sigrenderer->tick += effectvalue & 15;
+							break;
+#if 1
+						case IT_S7:
+							{
+								if (sigrenderer->sigdata->flags & IT_USE_INSTRUMENTS)
+								{
+									int i;
+									switch (effectvalue & 15)
+									{
+									case 0: /* cut background notes */
+										for (i = 0; i < DUMB_IT_N_NNA_CHANNELS; i++)
+										{
+											IT_PLAYING * playing = sigrenderer->playing[i];
+											if (playing && channel == playing->channel)
+											{
+												playing->declick_stage = 3;
+												if (channel->playing == playing) channel->playing = NULL;
+											}
+										}
+										break;
+									case 1: /* release background notes */
+										for (i = 0; i < DUMB_IT_N_NNA_CHANNELS; i++)
+										{
+											IT_PLAYING * playing = sigrenderer->playing[i];
+											if (playing && channel == playing->channel && !(playing->flags & IT_PLAYING_SUSTAINOFF))
+											{
+												it_note_off(playing);
+											}
+										}
+										break;
+									case 2: /* fade background notes */
+										for (i = 0; i < DUMB_IT_N_NNA_CHANNELS; i++)
+										{
+											IT_PLAYING * playing = sigrenderer->playing[i];
+											if (playing && channel == playing->channel)
+											{
+												//playing->flags &= IT_PLAYING_SUSTAINOFF;
+												playing->flags |= IT_PLAYING_FADING;
+											}
+										}
+										break;
+									case 3:
+										channel->new_note_action = NNA_NOTE_CUT;
+										break;
+									case 4:
+										channel->new_note_action = NNA_NOTE_CONTINUE;
+										break;
+									case 5:
+										channel->new_note_action = NNA_NOTE_OFF;
+										break;
+									case 6:
+										channel->new_note_action = NNA_NOTE_FADE;
+										break;
+
+									case 7:
+										if (channel->playing)
+											channel->playing->enabled_envelopes &= ~IT_ENV_VOLUME;
+										break;
+									case 8:
+										if (channel->playing)
+											channel->playing->enabled_envelopes |= IT_ENV_VOLUME;
+										break;
+
+									case 9:
+										if (channel->playing)
+											channel->playing->enabled_envelopes &= ~IT_ENV_PANNING;
+										break;
+									case 10:
+										if (channel->playing)
+											channel->playing->enabled_envelopes |= IT_ENV_PANNING;
+										break;
+
+									case 11:
+										if (channel->playing)
+											channel->playing->enabled_envelopes &= ~IT_ENV_PITCH;
+										break;
+									case 12:
+										if (channel->playing)
+											channel->playing->enabled_envelopes |= IT_ENV_PITCH;
+										break;
+									}
+								}
+							}
+							break;
+#endif
+						case IT_S_SET_PAN:
+							//ASSERT(!(sigdata->flags & IT_WAS_AN_XM));
+							channel->pan =
+								((effectvalue & 15) << 2) |
+								((effectvalue & 15) >> 2);
+							channel->truepan = channel->pan << IT_ENVELOPE_SHIFT;
+
+							if (channel->playing)
+								channel->playing->panbrello_depth = 0;
+							break;
+						case IT_S_SET_SURROUND_SOUND:
+							if ((effectvalue & 15) == 15) {
+								if (channel->playing && channel->playing->sample &&
+									!(channel->playing->sample->flags & (IT_SAMPLE_LOOP | IT_SAMPLE_SUS_LOOP))) {
+									channel->playing->flags |= IT_PLAYING_REVERSE;
+									it_playing_reset_resamplers( channel->playing, channel->playing->sample->length - 1 );
+								}
+							} else if ((effectvalue & 15) == 1) {
+								channel->pan = IT_SURROUND;
+								channel->truepan = channel->pan << IT_ENVELOPE_SHIFT;
+							}
+							if (channel->playing)
+								channel->playing->panbrello_depth = 0;
+							break;
+						case IT_S_SET_HIGH_OFFSET:
+							channel->high_offset = effectvalue & 15;
+							break;
+						//case IT_S_PATTERN_LOOP:
+						case IT_S_DELAYED_NOTE_CUT:
+							channel->note_cut_count = effectvalue & 15;
+							if (!channel->note_cut_count) {
+								if (sigdata->flags & (IT_WAS_AN_XM | IT_WAS_A_PTM))
+									channel->volume = 0;
+								else
+									channel->note_cut_count = 1;
+							}
+							break;
+						case IT_S_SET_MIDI_MACRO:
+							if ((sigdata->flags & (IT_WAS_AN_XM | IT_WAS_A_MOD)) == (IT_WAS_AN_XM | IT_WAS_A_MOD)) {
+								channel->inv_loop_speed = effectvalue & 15;
+								update_invert_loop(channel, channel->playing ? channel->playing->sample : NULL);
+							} else channel->SFmacro = effectvalue & 15;
+							break;
+					}
+				}
+				break;
+			case IT_SET_SONG_TEMPO:
+				{
+					unsigned char v = entry->effectvalue;
+					if (v == 0)
+						v = channel->lastW;
+					channel->lastW = v;
+					if (v < 0x10)
+						sigrenderer->temposlide = -v;
+					else if (v < 0x20)
+						sigrenderer->temposlide = v & 15;
+					else
+						sigrenderer->tempo = v;
+				}
+				break;
+			case IT_FINE_VIBRATO:
+				{
+					unsigned char speed = entry->effectvalue >> 4;
+					unsigned char depth = entry->effectvalue & 15;
+					if (speed == 0)
+						speed = channel->lastHspeed;
+					channel->lastHspeed = speed;
+					if (depth == 0)
+						depth = channel->lastHdepth;
+					else {
+						if (sigdata->flags & IT_OLD_EFFECTS)
+							depth <<= 1;
+						channel->lastHdepth = depth;
+					}
+					for (i = -1; i < DUMB_IT_N_NNA_CHANNELS; i++) {
+						if (i < 0) playing = channel->playing;
+						else {
+							playing = sigrenderer->playing[i];
+							if (!playing || playing->channel != channel) continue;
+						}
+						if (playing) {
+							playing->vibrato_speed = speed;
+							playing->vibrato_depth = depth;
+							playing->vibrato_n++;
+						}
+					}
+				}
+				break;
+			case IT_SET_GLOBAL_VOLUME:
+				if ((sigdata->flags & IT_WAS_AN_S3M) && (entry->effectvalue > 64))
+					break;
+				if (entry->effectvalue <= 128)
+					sigrenderer->globalvolume = entry->effectvalue;
+#ifdef VOLUME_OUT_OF_RANGE_SETS_MAXIMUM
+				else
+					sigrenderer->globalvolume = 128;
+#endif
+				break;
+			case IT_GLOBAL_VOLUME_SLIDE:
+				{
+					unsigned char v = entry->effectvalue;
+					if (v == 0)
+						v = channel->lastW;
+					channel->lastW = v;
+					if ((v & 0x0F) == 0) { /* Wx0 */
+						sigrenderer->globalvolslide =
+							(sigdata->flags & IT_WAS_AN_XM) ? (v >> 4)*2 : (v >> 4);
+					} else if ((v & 0xF0) == 0) { /* W0x */
+						sigrenderer->globalvolslide =
+							(sigdata->flags & IT_WAS_AN_XM) ? (-v)*2 : (-v);
+					} else if ((v & 0x0F) == 0x0F) { /* WxF */
+						sigrenderer->globalvolume += v >> 4;
+						if (sigrenderer->globalvolume > 128) sigrenderer->globalvolume = 128;
+					} else if ((v & 0xF0) == 0xF0) { /* WFx */
+						sigrenderer->globalvolume -= v & 15;
+						if (sigrenderer->globalvolume > 128) sigrenderer->globalvolume = 0;
+					}
+				}
+				break;
+			case IT_SET_PANNING:
+				if (sigdata->flags & IT_WAS_AN_XM) {
+					channel->truepan = 32 + entry->effectvalue*64;
+				} else {
+					if (sigdata->flags & IT_WAS_AN_S3M)
+						channel->pan = (entry->effectvalue + 1) >> 1;
+					else
+						channel->pan = (entry->effectvalue + 2) >> 2;
+					channel->truepan = channel->pan << IT_ENVELOPE_SHIFT;
+				}
+				if (channel->playing)
+					channel->playing->panbrello_depth = 0;
+				break;
+			case IT_PANBRELLO:
+				{
+					unsigned char speed = entry->effectvalue >> 4;
+					unsigned char depth = entry->effectvalue & 15;
+					if (speed == 0)
+						speed = channel->lastYspeed;
+					channel->lastYspeed = speed;
+					if (depth == 0)
+						depth = channel->lastYdepth;
+					channel->lastYdepth = depth;
+					if (channel->playing) {
+						channel->playing->panbrello_speed = speed;
+						channel->playing->panbrello_depth = depth;
+					}
+				}
+				break;
+			case IT_MIDI_MACRO:
+				{
+					const IT_MIDI *midi = sigdata->midi ? sigdata->midi : &default_midi;
+					if (entry->effectvalue >= 0x80) {
+						int n = midi->Zmacrolen[entry->effectvalue-0x80];
+						int i;
+						for (i = 0; i < n; i++)
+							it_send_midi(sigrenderer, channel, midi->Zmacro[entry->effectvalue-0x80][i]);
+					} else {
+						int n = midi->SFmacrolen[channel->SFmacro];
+						int i, j;
+						for (i = 0, j = 1; i < n; i++, j <<= 1)
+							it_send_midi(sigrenderer, channel,
+								(unsigned char)(midi->SFmacroz[channel->SFmacro] & j ?
+									entry->effectvalue : midi->SFmacro[channel->SFmacro][i]));
+					}
+				}
+				break;
+			case IT_XM_SET_ENVELOPE_POSITION:
+				if (channel->playing && channel->playing->env_instrument) {
+					IT_ENVELOPE *envelope = &channel->playing->env_instrument->volume_envelope;
+					if (envelope->flags & IT_ENVELOPE_ON) {
+						IT_PLAYING_ENVELOPE *pe = &channel->playing->volume_envelope;
+						pe->tick = entry->effectvalue;
+						if (pe->tick >= envelope->node_t[envelope->n_nodes-1])
+							pe->tick = envelope->node_t[envelope->n_nodes-1];
+						pe->next_node = 0;
+						while (pe->tick > envelope->node_t[pe->next_node]) pe->next_node++;
+						xm_envelope_calculate_value(envelope, pe);
+					}
+				}
+				break;
+
+			/* uggly plain portamento for now */
+			case IT_PTM_NOTE_SLIDE_DOWN:
+			case IT_PTM_NOTE_SLIDE_DOWN_RETRIG:
+				{
+					channel->toneslide_retrig = (entry->effect == IT_PTM_NOTE_SLIDE_DOWN_RETRIG);
+
+					if (channel->ptm_last_toneslide) {
+						channel->toneslide_tick = channel->last_toneslide_tick;
+
+						if (--channel->toneslide_tick == 0) {
+							channel->truenote += channel->toneslide;
+							if (channel->truenote >= 120) {
+								if (channel->toneslide < 0) channel->truenote = 0;
+								else channel->truenote = 119;
+							}
+							channel->note += channel->toneslide;
+							if (channel->note >= 120) {
+								if (channel->toneslide < 0) channel->note = 0;
+								else channel->note = 119;
+							}
+
+							if (channel->playing) {
+								if (channel->sample) channel->playing->note = channel->truenote;
+								else channel->playing->note = channel->note;
+								it_playing_reset_resamplers(channel->playing, 0);
+								channel->playing->declick_stage = 0;
+							}
+						}
+					}
+
+					channel->ptm_last_toneslide = 0;
+
+					channel->toneslide = -(entry->effectvalue & 15);
+					channel->ptm_toneslide = (entry->effectvalue & 0xF0) >> 4;
+					channel->toneslide_tick += channel->ptm_toneslide;
+				}
+				break;
+			case IT_PTM_NOTE_SLIDE_UP:
+			case IT_PTM_NOTE_SLIDE_UP_RETRIG:
+				{
+					channel->toneslide_retrig = (entry->effect == IT_PTM_NOTE_SLIDE_UP_RETRIG);
+
+					if (channel->ptm_last_toneslide) {
+						channel->toneslide_tick = channel->last_toneslide_tick;
+
+						if (--channel->toneslide_tick == 0) {
+							channel->truenote += channel->toneslide;
+							if (channel->truenote >= 120) {
+								if (channel->toneslide < 0) channel->truenote = 0;
+								else channel->truenote = 119;
+							}
+							channel->note += channel->toneslide;
+							if (channel->note >= 120) {
+								if (channel->toneslide < 0) channel->note = 0;
+								else channel->note = 119;
+							}
+
+							if (channel->playing) {
+								if (channel->sample) channel->playing->note = channel->truenote;
+								else channel->playing->note = channel->note;
+								it_playing_reset_resamplers(channel->playing, 0);
+								channel->playing->declick_stage = 0;
+							}
+						}
+					}
+
+					channel->ptm_last_toneslide = 0;
+
+					channel->toneslide = -(entry->effectvalue & 15);
+					channel->ptm_toneslide = (entry->effectvalue & 0xF0) >> 4;
+					channel->toneslide_tick += channel->ptm_toneslide;
+				}
+				break;
+
+			case IT_OKT_NOTE_SLIDE_DOWN:
+			case IT_OKT_NOTE_SLIDE_DOWN_ROW:
+				channel->toneslide = -entry->effectvalue;
+				channel->okt_toneslide = (entry->effect == IT_OKT_NOTE_SLIDE_DOWN) ? 255 : 1;
+				break;
+
+			case IT_OKT_NOTE_SLIDE_UP:
+			case IT_OKT_NOTE_SLIDE_UP_ROW:
+				channel->toneslide = entry->effectvalue;
+				channel->okt_toneslide = (entry->effect == IT_OKT_NOTE_SLIDE_UP) ? 255 : 1;
+				break;
+
+			case IT_OKT_ARPEGGIO_3:
+			case IT_OKT_ARPEGGIO_4:
+			case IT_OKT_ARPEGGIO_5:
+				{
+					channel->arpeggio_offsets[0] = 0;
+					channel->arpeggio_offsets[1] = -(entry->effectvalue >> 4);
+					channel->arpeggio_offsets[2] = entry->effectvalue & 0x0F;
+
+					switch (entry->effect)
+					{
+					case IT_OKT_ARPEGGIO_3:
+						channel->arpeggio_table = (const unsigned char *)&arpeggio_okt_3;
+						break;
+
+					case IT_OKT_ARPEGGIO_4:
+						channel->arpeggio_table = (const unsigned char *)&arpeggio_okt_4;
+						break;
+
+					case IT_OKT_ARPEGGIO_5:
+						channel->arpeggio_table = (const unsigned char *)&arpeggio_okt_5;
+						break;
+					}
+				}
+				break;
+
+			case IT_OKT_VOLUME_SLIDE_DOWN:
+				if ( entry->effectvalue <= 16 ) channel->volslide = -entry->effectvalue;
+				else
+				{
+					channel->volume -= entry->effectvalue - 16;
+					if (channel->volume > 64) channel->volume = 0;
+				}
+				break;
+
+			case IT_OKT_VOLUME_SLIDE_UP:
+				if ( entry->effectvalue <= 16 ) channel->volslide = entry->effectvalue;
+				else
+				{
+					channel->volume += entry->effectvalue - 16;
+					if (channel->volume > 64) channel->volume = 64;
+				}
+				break;
+		}
+	}
+
+	if (!(sigdata->flags & IT_WAS_AN_XM))
+		post_process_it_volpan(sigrenderer, entry);
+
+	return 0;
+}
+
+
+
+static int process_it_note_data(DUMB_IT_SIGRENDERER *sigrenderer, IT_ENTRY *entry)
+{
+	DUMB_IT_SIGDATA *sigdata = sigrenderer->sigdata;
+	IT_CHANNEL *channel = &sigrenderer->channel[(int)entry->channel];
+
+	// When tone portamento and instrument are specified:
+	// If Gxx is off:
+	//   - same sample, do nothing but portamento
+	//   - diff sample, retrigger all but keep current note+slide + do porta
+	//   - if instrument is invalid, nothing; if sample is invalid, cut
+	// If Gxx is on:
+	//   - same sample or new sample invalid, retrigger envelopes and initialise note value for portamento to 'seek' to
+	//   - diff sample/inst, start using new envelopes
+	// When tone portamento is specified alone, sample won't change.
+	// TODO: consider what happens with instrument alone after all this...
+
+	if (entry->mask & (IT_ENTRY_NOTE | IT_ENTRY_INSTRUMENT)) {
+		if (entry->mask & IT_ENTRY_INSTRUMENT)
+			channel->instrument = entry->instrument;
+		instrument_to_sample(sigdata, channel);
+		if (channel->note <= 120) {
+			if ((sigdata->flags & IT_USE_INSTRUMENTS) && channel->sample == 0)
+				it_retrigger_note(sigrenderer, channel); /* Stop the note */ /*return 1;*/
+			if (entry->mask & IT_ENTRY_INSTRUMENT)
+				get_default_volpan(sigdata, channel);
+		} else
+			it_retrigger_note(sigrenderer, channel); /* Stop the note */
+	}
+
+	/** WARNING: This is not ideal, since channel->playing might not get allocated owing to lack of memory... */
+	if (((entry->mask & IT_ENTRY_VOLPAN) && entry->volpan >= 193 && entry->volpan <= 202) ||
+	    ((entry->mask & IT_ENTRY_EFFECT) && (entry->effect == IT_TONE_PORTAMENTO || entry->effect == IT_VOLSLIDE_TONEPORTA)))
+	{
+		if (channel->playing && (entry->mask & IT_ENTRY_INSTRUMENT)) {
+			if (sigdata->flags & IT_COMPATIBLE_GXX)
+				it_compatible_gxx_retrigger(sigdata, channel);
+			else if ((!(sigdata->flags & IT_USE_INSTRUMENTS) ||
+				(channel->instrument >= 1 && channel->instrument <= sigdata->n_instruments)) &&
+				channel->sample != channel->playing->sampnum)
+			{
+				unsigned char note = channel->playing->note;
+				int slide = channel->playing->slide;
+				it_retrigger_note(sigrenderer, channel);
+				if (channel->playing) {
+					channel->playing->note = note;
+					channel->playing->slide = slide;
+					// Should we be preserving sample_vibrato_time? depth?
+				}
+			}
+		}
+
+		channel->toneporta = 0;
+
+		if ((entry->mask & IT_ENTRY_VOLPAN) && entry->volpan >= 193 && entry->volpan <= 202) {
+			/* Tone Portamento in the volume column */
+			static const unsigned char slidetable[] = {0, 1, 4, 8, 16, 32, 64, 96, 128, 255};
+			unsigned char v = slidetable[entry->volpan - 193];
+			if (sigdata->flags & IT_COMPATIBLE_GXX) {
+				if (v == 0)
+					v = channel->lastG;
+				channel->lastG = v;
+			} else {
+				if (v == 0)
+					v = channel->lastEF;
+				channel->lastEF = v;
+			}
+			channel->toneporta += v << 4;
+		}
+
+		if ((entry->mask & IT_ENTRY_EFFECT) && (entry->effect == IT_TONE_PORTAMENTO || entry->effect == IT_VOLSLIDE_TONEPORTA)) {
+			/* Tone Portamento in the effect column */
+			unsigned char v;
+			if (entry->effect == IT_TONE_PORTAMENTO)
+				v = entry->effectvalue;
+			else
+				v = 0;
+			if (sigdata->flags & IT_COMPATIBLE_GXX) {
+				if (v == 0)
+					v = channel->lastG;
+				channel->lastG = v;
+			} else {
+				if (v == 0 && !(sigdata->flags & IT_WAS_A_669))
+					v = channel->lastEF;
+				channel->lastEF = v;
+			}
+			channel->toneporta += v << 4;
+		}
+
+		if ((entry->mask & IT_ENTRY_NOTE) || ((sigdata->flags & IT_COMPATIBLE_GXX) && (entry->mask & IT_ENTRY_INSTRUMENT))) {
+			if (channel->note <= 120) {
+				if (channel->sample)
+					channel->destnote = channel->truenote;
+				else
+					channel->destnote = channel->note;
+			}
+		}
+
+		if (channel->playing) goto skip_start_note;
+	}
+
+	if ((entry->mask & IT_ENTRY_NOTE) ||
+		((entry->mask & IT_ENTRY_INSTRUMENT) && (!channel->playing || entry->instrument != channel->playing->instnum)))
+	{
+		if (channel->note <= 120) {
+			get_true_pan(sigdata, channel);
+			if ((entry->mask & IT_ENTRY_NOTE) || !(sigdata->flags & (IT_WAS_AN_S3M|IT_WAS_A_PTM)))
+				it_retrigger_note(sigrenderer, channel);
+		}
+	}
+
+	skip_start_note:
+
+	if (entry->mask & IT_ENTRY_VOLPAN) {
+		if (entry->volpan <= 64) {
+			/* Volume */
+			channel->volume = entry->volpan;
+		} else if (entry->volpan <= 74) {
+			/* Fine volume slide up */
+			unsigned char v = entry->volpan - 65;
+			if (v == 0)
+				v = channel->lastvolslide;
+			channel->lastvolslide = v;
+			/* = effect DxF where x == entry->volpan - 65 */
+			channel->volume += v;
+			if (channel->volume > 64) channel->volume = 64;
+		} else if (entry->volpan <= 84) {
+			/* Fine volume slide down */
+			unsigned char v = entry->volpan - 75;
+			if (v == 0)
+				v = channel->lastvolslide;
+			channel->lastvolslide = v;
+			/* = effect DFx where x == entry->volpan - 75 */
+			channel->volume -= v;
+			if (channel->volume > 64) channel->volume = 0;
+		} else if (entry->volpan < 128) {
+			/* Volume slide up */
+			/* Volume slide down */
+			/* Portamento down */
+			/* Portamento up */
+		} else if (entry->volpan <= 192) {
+			/* Pan */
+			channel->pan = entry->volpan - 128;
+			channel->truepan = channel->pan << IT_ENVELOPE_SHIFT;
+		}
+		/* else */
+		/* Tone Portamento */
+		/* Vibrato */
+	}
+	return 0;
+}
+
+
+
+static void retrigger_xm_envelopes(IT_PLAYING *playing)
+{
+	playing->volume_envelope.next_node = 0;
+	playing->volume_envelope.tick = -1;
+	playing->pan_envelope.next_node = 0;
+	playing->pan_envelope.tick = -1;
+	playing->fadeoutcount = 1024;
+}
+
+
+
+static void process_xm_note_data(DUMB_IT_SIGRENDERER *sigrenderer, IT_ENTRY *entry)
+{
+	DUMB_IT_SIGDATA *sigdata = sigrenderer->sigdata;
+	IT_CHANNEL *channel = &sigrenderer->channel[(int)entry->channel];
+	IT_PLAYING * playing = NULL;
+
+	if (entry->mask & IT_ENTRY_INSTRUMENT) {
+		int oldsample = channel->sample;
+		channel->inv_loop_offset = 0;
+		channel->instrument = entry->instrument;
+		instrument_to_sample(sigdata, channel);
+		if (channel->playing &&
+			!((entry->mask & IT_ENTRY_NOTE) && entry->note >= 120) &&
+			!((entry->mask & IT_ENTRY_EFFECT) && entry->effect == IT_XM_KEY_OFF && entry->effectvalue == 0)) {
+			playing = dup_playing(channel->playing, channel, channel);
+			if (!playing) return;
+			if (!(sigdata->flags & IT_WAS_A_MOD)) {
+				/* Retrigger vol/pan envelopes if enabled, and cancel fadeout.
+				 * Also reset vol/pan to that of _original_ instrument.
+				 */
+				channel->playing->flags &= ~(IT_PLAYING_SUSTAINOFF | IT_PLAYING_FADING);
+				it_playing_update_resamplers(channel->playing);
+
+				channel->volume = channel->playing->sample->default_volume;
+				channel->truepan = 32 + channel->playing->sample->default_pan*64;
+
+				retrigger_xm_envelopes(channel->playing);
+			} else {
+				/* Switch if sample changed */
+				if (oldsample != channel->sample) {
+					int i;
+					for (i = 0; i < DUMB_IT_N_NNA_CHANNELS; i++) {
+						if (!sigrenderer->playing[i]) {
+							channel->playing->declick_stage = 3;
+							sigrenderer->playing[i] = channel->playing;
+							channel->playing = NULL;
+							break;
+						}
+					}
+
+					if (!channel->sample) {
+						if (channel->playing)
+						{
+							free_playing(sigrenderer, channel->playing);
+							channel->playing = NULL;
+						}
+					} else {
+						if (channel->playing) {
+							free_playing(sigrenderer, channel->playing);
+						}
+						channel->playing = playing;
+						playing = NULL;
+						channel->playing->declick_stage = 0;
+						channel->playing->sampnum = channel->sample;
+						channel->playing->sample = &sigdata->sample[channel->sample-1];
+						it_playing_reset_resamplers(channel->playing, 0);
+					}
+				}
+				get_default_volpan(sigdata, channel);
+			}
+		}
+	}
+
+	if (!((entry->mask & IT_ENTRY_EFFECT) && entry->effect == IT_XM_KEY_OFF && entry->effectvalue == 0) &&
+		(entry->mask & IT_ENTRY_NOTE))
+	{
+		if (!(entry->mask & IT_ENTRY_INSTRUMENT))
+			instrument_to_sample(sigdata, channel);
+
+		if (channel->note >= 120)
+			xm_note_off(sigdata, channel);
+		else if (channel->sample == 0) {
+			/** If we get here, one of the following is the case:
+			 ** 1. The instrument has never been specified on this channel.
+			 ** 2. The specified instrument is invalid.
+			 ** 3. The instrument has no sample mapped to the selected note.
+			 ** What should happen?
+			 **
+			 ** Experimentation shows that any existing note stops and cannot
+			 ** be brought back. A subsequent instrument change fixes that.
+			 **/
+			if (channel->playing) {
+				int i;
+				if (playing) {
+					free_playing(sigrenderer, channel->playing);
+					channel->playing = playing;
+					playing = NULL;
+				}
+				for (i = 0; i < DUMB_IT_N_NNA_CHANNELS; i++) {
+					if (!sigrenderer->playing[i]) {
+						channel->playing->declick_stage = 3;
+						sigrenderer->playing[i] = channel->playing;
+						channel->playing = NULL;
+						break;
+					}
+				}
+				if (channel->playing) {
+					free_playing(sigrenderer, channel->playing);
+					channel->playing = NULL;
+				}
+			}
+			if (playing) free_playing(sigrenderer, playing);
+			return;
+		} else if (channel->playing && (entry->mask & IT_ENTRY_VOLPAN) && ((entry->volpan>>4) == 0xF)) {
+			/* Don't retrigger note; portamento in the volume column. */
+		} else if (channel->playing &&
+		           (entry->mask & IT_ENTRY_EFFECT) &&
+		           (entry->effect == IT_TONE_PORTAMENTO ||
+		            entry->effect == IT_VOLSLIDE_TONEPORTA)) {
+			/* Don't retrigger note; portamento in the effects column. */
+		} else {
+			channel->destnote = IT_NOTE_OFF;
+
+			if (!channel->playing) {
+				channel->playing = new_playing(sigrenderer);
+				if (!channel->playing) {
+					if (playing) free_playing(sigrenderer, playing);
+					return;
+				}
+				// Adding the following seems to do the trick for the case where a piece starts with an instrument alone and then some notes alone.
+				retrigger_xm_envelopes(channel->playing);
+			}
+			else if (playing) {
+				/* volume rampy stuff! move note to NNA */
+				int i;
+				IT_PLAYING * ptemp;
+				if (playing->sample) ptemp = playing;
+				else ptemp = channel->playing;
+				if (!ptemp) {
+					if (playing) free_playing(sigrenderer, playing);
+					return;
+				}
+				playing = NULL;
+				for (i = 0; i < DUMB_IT_N_NNA_CHANNELS; i++) {
+					if (!sigrenderer->playing[i]) {
+						ptemp->declick_stage = 3;
+						ptemp->flags |= IT_PLAYING_SUSTAINOFF | IT_PLAYING_FADING;
+						sigrenderer->playing[i] = ptemp;
+						ptemp = NULL;
+						break;
+					}
+				}
+				if (ptemp) free_playing(sigrenderer, ptemp);
+			}
+
+			channel->playing->flags = 0;
+			channel->playing->resampling_quality = sigrenderer->resampling_quality;
+			channel->playing->channel = channel;
+			channel->playing->sample = &sigdata->sample[channel->sample-1];
+			if (sigdata->flags & IT_USE_INSTRUMENTS)
+				channel->playing->instrument = &sigdata->instrument[channel->instrument-1];
+			else
+				channel->playing->instrument = NULL;
+			channel->playing->env_instrument = channel->playing->instrument;
+			channel->playing->sampnum = channel->sample;
+			channel->playing->instnum = channel->instrument;
+			channel->playing->declick_stage = 0;
+			channel->playing->channel_volume = channel->channelvolume;
+			channel->playing->note = channel->truenote;
+			channel->playing->enabled_envelopes = 0;
+			channel->playing->volume_offset = 0;
+			channel->playing->panning_offset = 0;
+			//channel->playing->output = channel->output;
+			if (sigdata->flags & IT_USE_INSTRUMENTS) {
+				IT_PLAYING * playing = channel->playing;
+				IT_INSTRUMENT * instrument = playing->instrument;
+				if (instrument->volume_envelope.flags & IT_ENVELOPE_ON) playing->enabled_envelopes |= IT_ENV_VOLUME;
+				if (instrument->pan_envelope.flags & IT_ENVELOPE_ON) playing->enabled_envelopes |= IT_ENV_PANNING;
+				//if (instrument->output) playing->output = instrument->output;
+			}
+			channel->playing->filter_cutoff = 127;
+			channel->playing->filter_resonance = 0;
+			channel->playing->true_filter_cutoff = 127 << 8;
+			channel->playing->true_filter_resonance = 0;
+			channel->playing->vibrato_speed = 0;
+			channel->playing->vibrato_depth = 0;
+			channel->playing->vibrato_n = 0;
+			channel->playing->vibrato_time = 0;
+			channel->playing->vibrato_waveform = 0;
+			channel->playing->tremolo_speed = 0;
+			channel->playing->tremolo_depth = 0;
+			channel->playing->tremolo_time = 0;
+			channel->playing->tremolo_waveform = 0;
+			channel->playing->panbrello_speed = 0;
+			channel->playing->panbrello_depth = 0;
+			channel->playing->panbrello_time = 0;
+			channel->playing->panbrello_waveform = 0;
+			channel->playing->panbrello_random = 0;
+			channel->playing->sample_vibrato_time = 0;
+			channel->playing->sample_vibrato_waveform = channel->playing->sample->vibrato_waveform;
+			channel->playing->sample_vibrato_depth = 0;
+			channel->playing->slide = 0;
+			channel->playing->finetune = channel->playing->sample->finetune;
+			it_reset_filter_state(&channel->playing->filter_state[0]); // Are these
+			it_reset_filter_state(&channel->playing->filter_state[1]); // necessary?
+			it_playing_reset_resamplers(channel->playing, 0);
+
+			/** WARNING - is everything initialised? */
+		}
+	}
+
+	if (!((entry->mask & IT_ENTRY_EFFECT) && entry->effect == IT_XM_KEY_OFF && entry->effectvalue == 0) &&
+		!((entry->mask & IT_ENTRY_NOTE) && entry->note >= 120) &&
+		(entry->mask & (IT_ENTRY_NOTE | IT_ENTRY_INSTRUMENT)) == (IT_ENTRY_NOTE | IT_ENTRY_INSTRUMENT))
+	{
+		if (channel->playing) retrigger_xm_envelopes(channel->playing);
+		get_default_volpan(sigdata, channel);
+	}
+
+	if ((entry->mask & IT_ENTRY_VOLPAN) && ((entry->volpan>>4) == 0xF)) {
+		/* Tone Portamento */
+		unsigned char v = (entry->volpan & 15) << 4;
+		if (v == 0)
+			v = channel->lastG;
+		channel->lastG = v;
+		if (entry->mask & IT_ENTRY_NOTE)
+			if (channel->sample && channel->note < 120)
+				channel->destnote = channel->truenote;
+		channel->toneporta = v << 4;
+	} else if ((entry->mask & IT_ENTRY_EFFECT) &&
+	           (entry->effect == IT_TONE_PORTAMENTO ||
+	            entry->effect == IT_VOLSLIDE_TONEPORTA)) {
+		unsigned char v;
+		if (entry->effect == IT_TONE_PORTAMENTO)
+			v = entry->effectvalue;
+		else
+			v = 0;
+		if (v == 0)
+			v = channel->lastG;
+		channel->lastG = v;
+		if (entry->mask & IT_ENTRY_NOTE)
+			if (channel->sample && channel->note < 120)
+				channel->destnote = channel->truenote;
+		channel->toneporta = v << 4;
+	}
+
+	if (entry->mask & IT_ENTRY_VOLPAN) {
+		int effect = entry->volpan >> 4;
+		int value  = entry->volpan & 15;
+		switch (effect) {
+			case 0x6: /* Volume slide down */
+				channel->xm_volslide = -value;
+				break;
+			case 0x7: /* Volume slide up */
+				channel->xm_volslide = value;
+				break;
+			case 0x8: /* Fine volume slide down */
+				channel->volume -= value;
+				if (channel->volume > 64) channel->volume = 0;
+				break;
+			case 0x9: /* Fine volume slide up */
+				channel->volume += value;
+				if (channel->volume > 64) channel->volume = 64;
+				break;
+			case 0xA: /* Set vibrato speed */
+				if (value)
+					channel->lastHspeed = value;
+				if (channel->playing)
+					channel->playing->vibrato_speed = channel->lastHspeed;
+				break;
+			case 0xB: /* Vibrato */
+				if (value)
+					channel->lastHdepth = value << 2; /** WARNING: correct ? */
+				if (channel->playing) {
+					channel->playing->vibrato_depth = channel->lastHdepth;
+					channel->playing->vibrato_speed = channel->lastHspeed;
+					channel->playing->vibrato_n++;
+				}
+				break;
+			case 0xC: /* Set panning */
+				channel->truepan = 32 + value*(17*64);
+				break;
+			case 0xD: /* Pan slide left */
+				/* -128 is a special case for emulating a 'feature' in FT2.
+				 * As soon as effects are processed, it goes hard left.
+				 */
+				channel->panslide = value ? -value : -128;
+				break;
+			case 0xE: /* Pan slide Right */
+				channel->panslide = value;
+				break;
+			case 0xF: /* Tone porta */
+				break;
+			default:  /* Volume */
+				channel->volume = entry->volpan - 0x10;
+				break;
+		}
+	}
+
+	if (playing) free_playing(sigrenderer, playing);
+}
+
+
+
+/* This function assumes !IT_IS_END_ROW(entry). */
+static int process_note_data(DUMB_IT_SIGRENDERER *sigrenderer, IT_ENTRY *entry, int ignore_cxx)
+{
+	DUMB_IT_SIGDATA *sigdata = sigrenderer->sigdata;
+
+	if (sigdata->flags & IT_WAS_AN_XM)
+		process_xm_note_data(sigrenderer, entry);
+	else
+		if (process_it_note_data(sigrenderer, entry)) return 0;
+
+	return process_effects(sigrenderer, entry, ignore_cxx);
+}
+
+
+
+static int process_entry(DUMB_IT_SIGRENDERER *sigrenderer, IT_ENTRY *entry, int ignore_cxx)
+{
+	IT_CHANNEL *channel = &sigrenderer->channel[(int)entry->channel];
+
+	if (entry->mask & IT_ENTRY_NOTE)
+		channel->note = entry->note;
+
+	if ((entry->mask & (IT_ENTRY_NOTE|IT_ENTRY_EFFECT)) && (sigrenderer->sigdata->flags & IT_WAS_A_669)) {
+		reset_channel_effects(channel);
+		// XXX unknown
+		if (channel->playing) channel->playing->finetune = 0;
+	}
+
+	if ((entry->mask & IT_ENTRY_EFFECT) && entry->effect == IT_S) {
+		/* channel->lastS was set in update_pattern_variables(). */
+		unsigned char effectvalue = channel->lastS;
+		if (effectvalue >> 4 == IT_S_NOTE_DELAY) {
+			channel->note_delay_count = effectvalue & 15;
+			if (channel->note_delay_count == 0)
+				channel->note_delay_count = 1;
+			channel->note_delay_entry = entry;
+			return 0;
+		}
+	}
+
+	return process_note_data(sigrenderer, entry, ignore_cxx);
+}
+
+
+
+static void update_tick_counts(DUMB_IT_SIGRENDERER *sigrenderer)
+{
+	int i;
+
+	for (i = 0; i < DUMB_IT_N_CHANNELS; i++) {
+		IT_CHANNEL *channel = &sigrenderer->channel[i];
+
+		if (channel->key_off_count) {
+			channel->key_off_count--;
+			if (channel->key_off_count == 0)
+				xm_note_off(sigrenderer->sigdata, channel);
+		} else if (channel->note_cut_count) {
+			channel->note_cut_count--;
+			if (channel->note_cut_count == 0) {
+				if (sigrenderer->sigdata->flags & (IT_WAS_AN_XM | IT_WAS_A_PTM))
+					channel->volume = 0;
+				else if (channel->playing) {
+					int i;
+					for (i = 0; i < DUMB_IT_N_NNA_CHANNELS; i++) {
+						if (!sigrenderer->playing[i]) {
+							channel->playing->declick_stage = 3;
+							sigrenderer->playing[i] = channel->playing;
+							channel->playing = NULL;
+							break;
+						}
+					}
+					if (channel->playing) {
+						free_playing(sigrenderer, channel->playing);
+						channel->playing = NULL;
+					}
+				}
+			}
+		} else if (channel->note_delay_count) {
+			channel->note_delay_count--;
+			if (channel->note_delay_count == 0)
+				process_note_data(sigrenderer, channel->note_delay_entry, 0);
+					/* Don't bother checking the return value; if the note
+					 * was delayed, there can't have been a speed=0.
+					 */
+		}
+	}
+}
+
+
+
+static int envelope_get_y(IT_ENVELOPE *envelope, IT_PLAYING_ENVELOPE *pe)
+{
+#if 1
+	(void)envelope; //TODO: remove the parameter
+	return pe->value;
+#else
+	int ys, ye;
+	int ts, te;
+	int t;
+
+	if (pe->next_node <= 0)
+		return envelope->node_y[0] << IT_ENVELOPE_SHIFT;
+
+	if (pe->next_node >= envelope->n_nodes)
+		return envelope->node_y[envelope->n_nodes-1] << IT_ENVELOPE_SHIFT;
+
+	ys = envelope->node_y[pe->next_node-1] << IT_ENVELOPE_SHIFT;
+	ts = envelope->node_t[pe->next_node-1];
+	te = envelope->node_t[pe->next_node];
+
+	if (ts == te)
+		return ys;
+
+	ye = envelope->node_y[pe->next_node] << IT_ENVELOPE_SHIFT;
+
+	t = pe->tick;
+
+	return ys + (ye - ys) * (t - ts) / (te - ts);
+#endif
+}
+
+
+
+#if 0
+static int it_envelope_end(IT_PLAYING *playing, IT_ENVELOPE *envelope, IT_PLAYING_ENVELOPE *pe)
+{
+	if (pe->next_node >= envelope->n_nodes)
+		return 1;
+
+	if (pe->tick < envelope->node_t[pe->next_node]) return 0;
+
+	if ((envelope->flags & IT_ENVELOPE_LOOP_ON) &&
+	    envelope->loop_end >= pe->next_node &&
+	    envelope->node_t[envelope->loop_end] <= pe->tick) return 0;
+
+	if ((envelope->flags & IT_ENVELOPE_SUSTAIN_LOOP) &&
+	    !(playing->flags & IT_PLAYING_SUSTAINOFF) &&
+	    envelope->sus_loop_end >= pe->next_node &&
+	    envelope->node_t[envelope->sus_loop_end] <= pe->tick) return 0;
+
+	if (envelope->node_t[envelope->n_nodes-1] <= pe->tick) return 1;
+
+	return 0;
+}
+#endif
+
+
+
+/* Returns 1 when fading should be initiated for a volume envelope. */
+static int update_it_envelope(IT_PLAYING *playing, IT_ENVELOPE *envelope, IT_PLAYING_ENVELOPE *pe, int flags)
+{
+	if (!(playing->enabled_envelopes & flags) || !envelope->n_nodes)
+		return 0;
+
+	ASSERT(envelope->n_nodes > 0);
+
+	if (pe->tick <= 0)
+		pe->value = envelope->node_y[0] << IT_ENVELOPE_SHIFT;
+	else if (pe->tick >= envelope->node_t[envelope->n_nodes-1]) {
+		pe->value = envelope->node_y[envelope->n_nodes-1] << IT_ENVELOPE_SHIFT;
+	} else {
+		int ys = envelope->node_y[pe->next_node-1] << IT_ENVELOPE_SHIFT;
+		int ts = envelope->node_t[pe->next_node-1];
+		int te = envelope->node_t[pe->next_node];
+
+		if (ts == te)
+			pe->value = ys;
+		else {
+			int ye = envelope->node_y[pe->next_node] << IT_ENVELOPE_SHIFT;
+			int t = pe->tick;
+
+			pe->value = ys + (ye - ys) * (t - ts) / (te - ts);
+		}
+	}
+
+	pe->tick++;
+
+	recalculate_it_envelope_node(pe, envelope);
+
+	if ((envelope->flags & IT_ENVELOPE_SUSTAIN_LOOP) && !(playing->flags & IT_PLAYING_SUSTAINOFF)) {
+		if (pe->tick > envelope->node_t[envelope->sus_loop_end]) {
+			pe->next_node = envelope->sus_loop_start + 1;
+			ASSERT(pe->next_node <= envelope->n_nodes);
+			pe->tick = envelope->node_t[envelope->sus_loop_start];
+			return 0;
+		}
+	} else if (envelope->flags & IT_ENVELOPE_LOOP_ON) {
+		if (pe->tick > envelope->node_t[envelope->loop_end]) {
+			pe->next_node = envelope->loop_start + 1;
+			ASSERT(pe->next_node <= envelope->n_nodes);
+			pe->tick = envelope->node_t[envelope->loop_start];
+			return 0;
+		}
+	}
+	else if (pe->tick > envelope->node_t[envelope->n_nodes - 1])
+		return 1;
+
+	return 0;
+}
+
+
+
+static void update_it_envelopes(IT_PLAYING *playing)
+{
+	IT_ENVELOPE *envelope = &playing->env_instrument->volume_envelope;
+	IT_PLAYING_ENVELOPE *pe = &playing->volume_envelope;
+
+	if (update_it_envelope(playing, envelope, pe, IT_ENV_VOLUME)) {
+		playing->flags |= IT_PLAYING_FADING;
+		if (pe->value == 0)
+			playing->flags |= IT_PLAYING_DEAD;
+	}
+
+	update_it_envelope(playing, &playing->env_instrument->pan_envelope, &playing->pan_envelope, IT_ENV_PANNING);
+	update_it_envelope(playing, &playing->env_instrument->pitch_envelope, &playing->pitch_envelope, IT_ENV_PITCH);
+}
+
+
+
+static int xm_envelope_is_sustaining(IT_PLAYING *playing, IT_ENVELOPE *envelope, IT_PLAYING_ENVELOPE *pe)
+{
+	if ((envelope->flags & IT_ENVELOPE_SUSTAIN_LOOP) && !(playing->flags & IT_PLAYING_SUSTAINOFF))
+		if (envelope->sus_loop_start < envelope->n_nodes)
+			if (pe->tick == envelope->node_t[envelope->sus_loop_start])
+				return 1;
+	return 0;
+}
+
+
+
+static void update_xm_envelope(IT_PLAYING *playing, IT_ENVELOPE *envelope, IT_PLAYING_ENVELOPE *pe)
+{
+	if (!(envelope->flags & IT_ENVELOPE_ON))
+		return;
+
+	if (xm_envelope_is_sustaining(playing, envelope, pe))
+		return;
+
+	if (pe->tick >= envelope->node_t[envelope->n_nodes-1])
+		return;
+
+	pe->tick++;
+
+	/* pe->next_node must be kept up to date for envelope_get_y(). */
+	while (pe->tick > envelope->node_t[pe->next_node])
+		pe->next_node++;
+
+	if ((envelope->flags & IT_ENVELOPE_LOOP_ON) && envelope->loop_end < envelope->n_nodes) {
+		if (pe->tick == envelope->node_t[envelope->loop_end]) {
+			pe->next_node = MID(0, envelope->loop_start, envelope->n_nodes - 1);
+			pe->tick = envelope->node_t[pe->next_node];
+		}
+	}
+
+	xm_envelope_calculate_value(envelope, pe);
+}
+
+
+
+static void update_xm_envelopes(IT_PLAYING *playing)
+{
+	update_xm_envelope(playing, &playing->env_instrument->volume_envelope, &playing->volume_envelope);
+	update_xm_envelope(playing, &playing->env_instrument->pan_envelope, &playing->pan_envelope);
+}
+
+
+
+static void update_fadeout(DUMB_IT_SIGDATA *sigdata, IT_PLAYING *playing)
+{
+	if (playing->flags & IT_PLAYING_FADING) {
+		playing->fadeoutcount -= playing->env_instrument->fadeout;
+		if (playing->fadeoutcount <= 0) {
+			playing->fadeoutcount = 0;
+			if (!(sigdata->flags & IT_WAS_AN_XM))
+				playing->flags |= IT_PLAYING_DEAD;
+		}
+	}
+}
+
+static int apply_pan_envelope(IT_PLAYING *playing);
+static float calculate_volume(DUMB_IT_SIGRENDERER *sigrenderer, IT_PLAYING *playing, double volume);
+
+static void playing_volume_setup(DUMB_IT_SIGRENDERER * sigrenderer, IT_PLAYING * playing, float invt2g)
+{
+	DUMB_IT_SIGDATA * sigdata = sigrenderer->sigdata;
+	int pan;
+	float vol, span;
+    float rampScale;
+    int ramp_style = sigrenderer->ramp_style;
+ 
+	pan = apply_pan_envelope(playing);
+
+	if ((sigrenderer->n_channels >= 2) && (sigdata->flags & IT_STEREO) && (sigrenderer->n_channels != 3 || !IT_IS_SURROUND_SHIFTED(pan))) {
+		if (!IT_IS_SURROUND_SHIFTED(pan)) {
+			span = (pan - (32<<8)) * sigdata->pan_separation * (1.0f / ((32<<8) * 128));
+			vol = 0.5f * (1.0f - span);
+			playing->float_volume[0] = vol;
+			playing->float_volume[1] = 1.0f - vol;
+		} else {
+			playing->float_volume[0] = -0.5f;
+			playing->float_volume[1] = 0.5f;
+		}
+ 	} else {
+		playing->float_volume[0] = 1.0f;
+		playing->float_volume[1] = 1.0f;
+	}
+
+	vol = calculate_volume(sigrenderer, playing, 1.0f);
+	playing->float_volume[0] *= vol;
+	playing->float_volume[1] *= vol;
+    
+    rampScale = 4;
+
+    if (ramp_style > 0 && playing->declick_stage == 2) {
+        if ((playing->ramp_volume[0] == 0 && playing->ramp_volume[1] == 0) || vol == 0)
+            rampScale = 48;
+    }
+
+    if (ramp_style == 0 || (ramp_style < 2 && playing->declick_stage == 2)) {
+		if (playing->declick_stage <= 2) {
+			playing->ramp_volume[0] = playing->float_volume[0];
+			playing->ramp_volume[1] = playing->float_volume[1];
+			playing->declick_stage = 2;
+		} else {
+			playing->float_volume[0] = 0;
+			playing->float_volume[1] = 0;
+			playing->ramp_volume[0] = 0;
+			playing->ramp_volume[1] = 0;
+			playing->declick_stage = 5;
+		}
+		playing->ramp_delta[0] = 0;
+        playing->ramp_delta[1] = 0;
+    } else {
+        if (playing->declick_stage == 0) {
+            playing->ramp_volume[0] = 0;
+            playing->ramp_volume[1] = 0;
+            rampScale = 48;
+            playing->declick_stage++;
+        } else if (playing->declick_stage == 1) {
+            rampScale = 48;
+        } else if (playing->declick_stage >= 3) {
+            playing->float_volume[0] = 0;
+            playing->float_volume[1] = 0;
+            if (playing->declick_stage == 3)
+                playing->declick_stage++;
+            rampScale = 48;
+        }
+        playing->ramp_delta[0] = rampScale * invt2g * (playing->float_volume[0] - playing->ramp_volume[0]);
+        playing->ramp_delta[1] = rampScale * invt2g * (playing->float_volume[1] - playing->ramp_volume[1]);
+    }
+}
+
+static void process_playing(DUMB_IT_SIGRENDERER *sigrenderer, IT_PLAYING *playing, float invt2g)
+{
+	DUMB_IT_SIGDATA * sigdata = sigrenderer->sigdata;
+
+	if (playing->instrument) {
+		if (sigdata->flags & IT_WAS_AN_XM)
+			update_xm_envelopes(playing);
+		else
+			update_it_envelopes(playing);
+		update_fadeout(sigdata, playing);
+	}
+
+	playing_volume_setup(sigrenderer, playing, invt2g);
+
+	if (sigdata->flags & IT_WAS_AN_XM) {
+		/* 'depth' is used to store the tick number for XM files. */
+		if (playing->sample_vibrato_depth < playing->sample->vibrato_rate)
+			playing->sample_vibrato_depth++;
+	} else {
+		playing->sample_vibrato_depth += playing->sample->vibrato_rate;
+		if (playing->sample_vibrato_depth > playing->sample->vibrato_depth << 8)
+			playing->sample_vibrato_depth = playing->sample->vibrato_depth << 8;
+	}
+
+	playing->sample_vibrato_time += playing->sample->vibrato_speed;
+}
+
+// Apparently some GCCs have problems here so renaming the function sounds like a better idea.
+//#if defined(_MSC_VER) && _MSC_VER < 1800
+static double mylog2(double x) {return log(x)/log(2.0);}
+//#endif
+
+static int delta_to_note(float delta, int base)
+{
+	double note;
+	note = mylog2(delta * 65536.f / (float)base)*12.0f+60.5f;
+	if (note > 119) note = 119;
+	else if (note < 0) note = 0;
+	return (int)note;
+}
+
+#if 0
+// Period table for Protracker octaves 0-5:
+static const unsigned short ProTrackerPeriodTable[6*12] =
+{
+	1712,1616,1524,1440,1356,1280,1208,1140,1076,1016,960,907,
+	856,808,762,720,678,640,604,570,538,508,480,453,
+	428,404,381,360,339,320,302,285,269,254,240,226,
+	214,202,190,180,170,160,151,143,135,127,120,113,
+	107,101,95,90,85,80,75,71,67,63,60,56,
+	53,50,47,45,42,40,37,35,33,31,30,28
+};
+
+
+static const unsigned short ProTrackerTunedPeriods[16*12] = 
+{
+	1712,1616,1524,1440,1356,1280,1208,1140,1076,1016,960,907,
+	1700,1604,1514,1430,1348,1274,1202,1134,1070,1010,954,900,
+	1688,1592,1504,1418,1340,1264,1194,1126,1064,1004,948,894,
+	1676,1582,1492,1408,1330,1256,1184,1118,1056,996,940,888,
+	1664,1570,1482,1398,1320,1246,1176,1110,1048,990,934,882,
+	1652,1558,1472,1388,1310,1238,1168,1102,1040,982,926,874,
+	1640,1548,1460,1378,1302,1228,1160,1094,1032,974,920,868,
+	1628,1536,1450,1368,1292,1220,1150,1086,1026,968,914,862,
+	1814,1712,1616,1524,1440,1356,1280,1208,1140,1076,1016,960,
+	1800,1700,1604,1514,1430,1350,1272,1202,1134,1070,1010,954,
+	1788,1688,1592,1504,1418,1340,1264,1194,1126,1064,1004,948,
+	1774,1676,1582,1492,1408,1330,1256,1184,1118,1056,996,940,
+	1762,1664,1570,1482,1398,1320,1246,1176,1110,1048,988,934,
+	1750,1652,1558,1472,1388,1310,1238,1168,1102,1040,982,926,
+	1736,1640,1548,1460,1378,1302,1228,1160,1094,1032,974,920,
+	1724,1628,1536,1450,1368,1292,1220,1150,1086,1026,968,914 
+};
+#endif
+
+static void process_all_playing(DUMB_IT_SIGRENDERER *sigrenderer)
+{
+	DUMB_IT_SIGDATA *sigdata = sigrenderer->sigdata;
+	int i;
+
+	float invt2g = 1.0f / ((float)TICK_TIME_DIVIDEND / (float)sigrenderer->tempo / 256.0f);
+
+	for (i = 0; i < DUMB_IT_N_CHANNELS; i++) {
+		IT_CHANNEL *channel = &sigrenderer->channel[i];
+		IT_PLAYING *playing = channel->playing;
+
+		if (playing) {
+			int vibrato_shift;
+			switch (playing->vibrato_waveform)
+			{
+			default:
+				vibrato_shift = it_sine[playing->vibrato_time];
+				break;
+			case 1:
+				vibrato_shift = it_sawtooth[playing->vibrato_time];
+				break;
+			case 2:
+				vibrato_shift = it_squarewave[playing->vibrato_time];
+				break;
+			case 3:
+				vibrato_shift = (rand() % 129) - 64;
+				break;
+			case 4:
+				vibrato_shift = it_xm_squarewave[playing->vibrato_time];
+				break;
+			case 5:
+				vibrato_shift = it_xm_ramp[playing->vibrato_time];
+				break;
+			case 6:
+				vibrato_shift = it_xm_ramp[255-playing->vibrato_time];
+				break;
+			}
+			vibrato_shift *= playing->vibrato_n;
+			vibrato_shift *= playing->vibrato_depth;
+			vibrato_shift >>= 4;
+
+			if (sigdata->flags & IT_OLD_EFFECTS)
+				vibrato_shift = -vibrato_shift;
+
+			playing->volume = channel->volume;
+			playing->pan = channel->truepan;
+
+			if (playing->volume_offset) {
+				playing->volume += (playing->volume_offset * playing->volume) >> 7;
+				if (playing->volume > 64) {
+					if (playing->volume_offset < 0) playing->volume = 0;
+					else playing->volume = 64;
+				}
+			}
+
+			if (playing->panning_offset && !IT_IS_SURROUND_SHIFTED(playing->pan)) {
+				playing->pan += playing->panning_offset << IT_ENVELOPE_SHIFT;
+				if (playing->pan > 64 << IT_ENVELOPE_SHIFT) {
+					if (playing->panning_offset < 0) playing->pan = 0;
+					else playing->pan = 64 << IT_ENVELOPE_SHIFT;
+				}
+			}
+
+			if (sigdata->flags & IT_LINEAR_SLIDES) {
+				int currpitch = ((playing->note - 60) << 8) + playing->slide
+				                                            + vibrato_shift
+															+ playing->finetune;
+
+				/* We add a feature here, which is that of keeping the pitch
+				 * within range. Otherwise it crashes. Trust me. It happened.
+				 * The limit 32768 gives almost 11 octaves either way.
+				 */
+				if (currpitch < -32768)
+					currpitch = -32768;
+				else if (currpitch > 32767)
+					currpitch = 32767;
+
+				playing->delta = (float)pow(DUMB_PITCH_BASE, currpitch);
+				playing->delta *= playing->sample->C5_speed * (1.f / 65536.0f);
+			} else {
+				int slide = playing->slide + vibrato_shift;
+
+				playing->delta = (float)pow(DUMB_PITCH_BASE, ((60 - playing->note) << 8) - playing->finetune );
+				/* playing->delta is 1.0 for C-5, 0.5 for C-6, etc. */
+
+				playing->delta *= 1.0f / playing->sample->C5_speed;
+
+				playing->delta -= slide / AMIGA_DIVISOR;
+
+				if (playing->delta < (1.0f / 65536.0f) / 32768.0f) {
+					// Should XM notes die if Amiga slides go out of range?
+					playing->flags |= IT_PLAYING_DEAD;
+					playing->delta = 1. / 32768.;
+					continue;
+				}
+
+				playing->delta = (1.0f / 65536.0f) / playing->delta;
+			}
+
+			if (playing->channel->glissando && playing->channel->toneporta && playing->channel->destnote < 120) {
+				playing->delta = (float)pow(DUMB_SEMITONE_BASE, delta_to_note(playing->delta, playing->sample->C5_speed) - 60)
+					* playing->sample->C5_speed * (1.f / 65536.f);
+			}
+
+			/*
+			if ( channel->arpeggio ) { // another FT2 bug...
+				if ((sigdata->flags & (IT_LINEAR_SLIDES|IT_WAS_AN_XM|IT_WAS_A_MOD)) == (IT_WAS_AN_XM|IT_LINEAR_SLIDES) &&
+					playing->flags & IT_PLAYING_SUSTAINOFF)
+				{
+					if ( channel->arpeggio > 0xFF )
+						playing->delta = playing->sample->C5_speed * (1.f / 65536.f);
+				}
+				else*/
+				{
+					int tick = sigrenderer->tick - 1;
+					if ((sigrenderer->sigdata->flags & (IT_WAS_AN_XM|IT_WAS_A_MOD))!=IT_WAS_AN_XM)
+						tick = sigrenderer->speed - tick - 1;
+					else if (tick == sigrenderer->speed - 1)
+						tick = 0;
+					else
+						++tick;
+					playing->delta *= (float)pow(DUMB_SEMITONE_BASE, channel->arpeggio_offsets[channel->arpeggio_table[tick&31]]);
+				}
+			/*
+			}*/
+
+			playing->filter_cutoff = channel->filter_cutoff;
+			playing->filter_resonance = channel->filter_resonance;
+		}
+	}
+
+	for (i = 0; i < DUMB_IT_N_CHANNELS; i++) {
+		if (sigrenderer->channel[i].playing) {
+			process_playing(sigrenderer, sigrenderer->channel[i].playing, invt2g);
+			if (!(sigdata->flags & IT_WAS_AN_XM)) {
+				//if ((sigrenderer->channel[i].playing->flags & (IT_PLAYING_BACKGROUND | IT_PLAYING_DEAD)) == (IT_PLAYING_BACKGROUND | IT_PLAYING_DEAD)) {
+				// This change was made so Gxx would work correctly when a note faded out or whatever. Let's hope nothing else was broken by it.
+				if (sigrenderer->channel[i].playing->flags & IT_PLAYING_DEAD) {
+					free_playing(sigrenderer, sigrenderer->channel[i].playing);
+					sigrenderer->channel[i].playing = NULL;
+				}
+			}
+		}
+	}
+
+	for (i = 0; i < DUMB_IT_N_NNA_CHANNELS; i++) {
+		if (sigrenderer->playing[i]) {
+			process_playing(sigrenderer, sigrenderer->playing[i], invt2g);
+			if (sigrenderer->playing[i]->flags & IT_PLAYING_DEAD) {
+				free_playing(sigrenderer, sigrenderer->playing[i]);
+				sigrenderer->playing[i] = NULL;
+			}
+		}
+	}
+}
+
+
+
+static int process_tick(DUMB_IT_SIGRENDERER *sigrenderer)
+{
+	DUMB_IT_SIGDATA *sigdata = sigrenderer->sigdata;
+
+	// Set note vol/freq to vol/freq set for each channel
+
+	if (sigrenderer->speed && --sigrenderer->tick == 0) {
+		reset_tick_counts(sigrenderer);
+		sigrenderer->tick = sigrenderer->speed;
+		sigrenderer->rowcount--;
+		if (sigrenderer->rowcount == 0) {
+			sigrenderer->rowcount = 1;
+
+#ifdef BIT_ARRAY_BULLSHIT
+			if (sigrenderer->n_rows)
+			{
+#if 1
+				/*
+				if (bit_array_test(sigrenderer->played, sigrenderer->order * 256 + sigrenderer->row))
+				{
+					if (sigrenderer->callbacks->loop) {
+						if ((*sigrenderer->callbacks->loop)(sigrenderer->callbacks->loop_data))
+							return 1;
+						bit_array_reset(sigrenderer->played);
+						if (sigrenderer->speed == 0)
+							goto speed0; // I love goto
+					}
+				}
+				*/
+#endif
+				bit_array_set(sigrenderer->played, sigrenderer->order * 256 + sigrenderer->row);
+				{
+					int n;
+					for (n = 0; n < DUMB_IT_N_CHANNELS; n++)
+					{
+						IT_CHANNEL * channel = &sigrenderer->channel[n];
+						if (channel->played_patjump)
+						{
+							if (channel->played_patjump_order == sigrenderer->order)
+							{
+								bit_array_set(channel->played_patjump, sigrenderer->row);
+							}
+							/*
+							else if ((channel->played_patjump_order & 0x7FFF) == sigrenderer->order)
+							{
+								channel->played_patjump_order |= 0x4000;
+							}
+							else if ((channel->played_patjump_order & 0x3FFF) == sigrenderer->order)
+							{
+								if ((sigdata->flags & (IT_WAS_AN_XM|IT_WAS_A_MOD)) == IT_WAS_AN_XM)
+								{
+									// joy, was XM, pattern loop bug triggered break to row in same order 
+									bit_array_mask(sigrenderer->played, channel->played_patjump, sigrenderer->order * 256);
+								}
+								bit_array_destroy(channel->played_patjump);
+								channel->played_patjump = 0;
+								channel->played_patjump_order = 0xFFFE;
+							}
+							*/
+							else
+							{
+								bit_array_destroy(channel->played_patjump);
+								channel->played_patjump = 0;
+								channel->played_patjump_order = 0xFFFE;
+							}
+						}
+					}
+				}
+			}
+#endif
+
+			sigrenderer->processrow++;
+
+			if (sigrenderer->processrow >= sigrenderer->n_rows) {
+				IT_PATTERN *pattern;
+				int n;
+				int processorder = sigrenderer->processorder;
+
+				if ((sigrenderer->processrow|0xC00) == 0xFFFE + 1) { /* It was incremented above! */
+					sigrenderer->processrow = sigrenderer->breakrow;
+					sigrenderer->breakrow = 0;
+					for (n = 0; n < DUMB_IT_N_CHANNELS; n++) sigrenderer->channel[n].pat_loop_end_row = 0;
+				} else {
+					sigrenderer->processrow = sigrenderer->breakrow;
+					sigrenderer->breakrow = 0; // XXX lolwut
+				}
+
+				if (sigrenderer->processorder == 0xFFFF)
+					sigrenderer->processorder = sigrenderer->order - 1;
+
+				for (;;) {
+					sigrenderer->processorder++;
+
+					if (sigrenderer->processorder >= sigdata->n_orders) {
+						sigrenderer->processorder = sigrenderer->restart_position;
+						if (sigrenderer->processorder >= sigdata->n_orders) {
+							/* Restarting beyond end. We'll loop for now. */
+							sigrenderer->processorder = -1;
+							continue;
+						}
+						if (sigdata->flags & IT_WAS_AN_OKT) {
+							/* Reset some things */
+							sigrenderer->speed = sigdata->speed;
+							sigrenderer->tempo = sigdata->tempo;
+							for (n = 0; n < DUMB_IT_N_CHANNELS; n++) {
+								xm_note_off(sigdata, &sigrenderer->channel[n]);
+							}
+						}
+					}
+
+					n = sigdata->order[sigrenderer->processorder];
+
+					if (n < sigdata->n_patterns)
+						break;
+
+#ifdef INVALID_ORDERS_END_SONG
+					if (n != IT_ORDER_SKIP)
+#else
+					if (n == IT_ORDER_END)
+#endif
+					{
+						sigrenderer->processorder = sigrenderer->restart_position - 1;
+					}
+
+#ifdef BIT_ARRAY_BULLSHIT
+					/* Fix play tracking and timekeeping for orders containing skip commands */
+					for (n = 0; n < 256; n++) {
+						bit_array_set(sigrenderer->played, sigrenderer->processorder * 256 + n);
+					}
+#endif
+				}
+
+				pattern = &sigdata->pattern[n];
+
+				n = sigrenderer->n_rows;
+				sigrenderer->n_rows = pattern->n_rows;
+
+				if (sigrenderer->processrow >= sigrenderer->n_rows)
+					sigrenderer->processrow = 0;
+
+/** WARNING - everything pertaining to a new pattern initialised? */
+
+				sigrenderer->entry = sigrenderer->entry_start = pattern->entry;
+				sigrenderer->entry_end = sigrenderer->entry + pattern->n_entries;
+
+				/* If n_rows was 0, we're only just starting. Don't do anything weird here. */
+				/* added: process row check, for break to row spooniness */
+				if (n && (processorder == 0xFFFF ? sigrenderer->order > sigrenderer->processorder : sigrenderer->order >= sigrenderer->processorder)
+#ifdef BIT_ARRAY_BULLSHIT
+					&& bit_array_test(sigrenderer->played, sigrenderer->processorder * 256 + sigrenderer->processrow)
+#endif
+					) {
+					if (sigrenderer->callbacks->loop) {
+						if ((*sigrenderer->callbacks->loop)(sigrenderer->callbacks->loop_data))
+							return 1;
+#ifdef BIT_ARRAY_BULLSHIT
+						bit_array_reset(sigrenderer->played);
+#endif
+						if (sigrenderer->speed == 0)
+							goto speed0; /* I love goto */
+					}
+				}
+				sigrenderer->order = sigrenderer->processorder;
+
+				n = sigrenderer->processrow;
+				while (n) {
+					while (sigrenderer->entry < sigrenderer->entry_end) {
+						if (IT_IS_END_ROW(sigrenderer->entry)) {
+							sigrenderer->entry++;
+							break;
+						}
+						sigrenderer->entry++;
+					}
+					n--;
+				}
+				sigrenderer->row = sigrenderer->processrow;
+			} else {
+				if (sigrenderer->entry) {
+					while (sigrenderer->entry < sigrenderer->entry_end) {
+						if (IT_IS_END_ROW(sigrenderer->entry)) {
+							sigrenderer->entry++;
+							break;
+						}
+						sigrenderer->entry++;
+					}
+					sigrenderer->row++;
+				} else {
+#ifdef BIT_ARRAY_BULLSHIT
+					bit_array_clear(sigrenderer->played, sigrenderer->order * 256);
+#endif
+					sigrenderer->entry = sigrenderer->entry_start;
+					sigrenderer->row = 0;
+				}
+			}
+
+			if (!(sigdata->flags & IT_WAS_A_669))
+				reset_effects(sigrenderer);
+
+			{
+				IT_ENTRY *entry = sigrenderer->entry;
+				int ignore_cxx = 0;
+
+				while (entry < sigrenderer->entry_end && !IT_IS_END_ROW(entry))
+					ignore_cxx |= update_pattern_variables(sigrenderer, entry++);
+
+				entry = sigrenderer->entry;
+
+				while (entry < sigrenderer->entry_end && !IT_IS_END_ROW(entry))
+					if (process_entry(sigrenderer, entry++, sigdata->flags & IT_WAS_AN_XM ? 0 : ignore_cxx))
+						return 1;
+			}
+
+			if (sigdata->flags & IT_WAS_AN_OKT)
+				update_effects(sigrenderer);
+			else if (!(sigdata->flags & IT_OLD_EFFECTS))
+				update_smooth_effects(sigrenderer);
+		} else {
+			{
+				IT_ENTRY *entry = sigrenderer->entry;
+
+				while (entry < sigrenderer->entry_end && !IT_IS_END_ROW(entry)) {
+					if (entry->mask & IT_ENTRY_EFFECT && entry->effect != IT_SET_SAMPLE_OFFSET)
+						process_effects(sigrenderer, entry, 0);
+							/* Don't bother checking the return value; if there
+							 * was a pattern delay, there can't be a speed=0.
+							 */
+					entry++;
+				}
+			}
+
+			update_effects(sigrenderer);
+		}
+	} else {
+		if ( !(sigdata->flags & IT_WAS_AN_STM) || !(sigrenderer->tick & 15)) {
+			speed0:
+			update_effects(sigrenderer);
+			update_tick_counts(sigrenderer);
+		}
+	}
+
+	if (sigrenderer->globalvolume == 0) {
+		if (sigrenderer->callbacks->global_volume_zero) {
+			LONG_LONG t = sigrenderer->gvz_sub_time + ((TICK_TIME_DIVIDEND / (sigrenderer->tempo << 8)) << 16);
+			sigrenderer->gvz_time += (int)(t >> 16);
+			sigrenderer->gvz_sub_time = (int)t & 65535;
+			if (sigrenderer->gvz_time >= 65536 * 12) {
+				if ((*sigrenderer->callbacks->global_volume_zero)(sigrenderer->callbacks->global_volume_zero_data))
+					return 1;
+			}
+		}
+	} else {
+		if (sigrenderer->callbacks->global_volume_zero) {
+			sigrenderer->gvz_time = 0;
+			sigrenderer->gvz_sub_time = 0;
+		}
+	}
+
+	process_all_playing(sigrenderer);
+
+	{
+		LONG_LONG t = (TICK_TIME_DIVIDEND / (sigrenderer->tempo << 8)) << 16;
+		if ( sigrenderer->sigdata->flags & IT_WAS_AN_STM ) {
+			t /= 16;
+		}
+		t += sigrenderer->sub_time_left;
+		sigrenderer->time_left += (int)(t >> 16);
+		sigrenderer->sub_time_left = (int)t & 65535;
+	}
+
+	return 0;
+}
+
+
+
+int dumb_it_max_to_mix = 64;
+
+#if 0
+static const int aiMODVol[] =
+{
+	0,
+		16, 24, 32, 48, 64, 80, 96, 112,
+		128, 144, 160, 176, 192, 208, 224, 240,
+		256, 272, 288, 304, 320, 336, 352, 368,
+		384, 400, 416, 432, 448, 464, 480, 496,
+		529, 545, 561, 577, 593, 609, 625, 641,
+		657, 673, 689, 705, 721, 737, 753, 769,
+		785, 801, 817, 833, 849, 865, 881, 897,
+		913, 929, 945, 961, 977, 993, 1009, 1024
+};
+#endif
+
+static const int aiPTMVolScaled[] =
+{
+	0,
+		31, 54, 73, 96, 111, 130, 153, 172,
+		191, 206, 222, 237, 252, 275, 298, 317,
+		336, 351, 370, 386, 401, 416, 428, 443,
+		454, 466, 477, 489, 512, 531, 553, 573,
+		592, 611, 626, 645, 660, 679, 695, 710,
+		725, 740, 756, 767, 782, 798, 809, 820,
+		836, 847, 859, 870, 881, 897, 908, 916,
+		927, 939, 950, 962, 969, 983, 1005, 1024
+};
+
+static float calculate_volume(DUMB_IT_SIGRENDERER *sigrenderer, IT_PLAYING *playing, double volume)
+{
+	if (volume != 0) {
+		int vol;
+
+		if (playing->channel->flags & IT_CHANNEL_MUTED)
+			return 0;
+
+		if ((playing->channel->tremor_time & 192) == 128)
+			return 0;
+
+		switch (playing->tremolo_waveform)
+		{
+		default:
+			vol = it_sine[playing->tremolo_time];
+			break;
+		case 1:
+			vol = it_sawtooth[playing->tremolo_time];
+			break;
+		case 2:
+			vol = it_squarewave[playing->tremolo_time];
+			break;
+		case 3:
+			vol = (rand() % 129) - 64;
+			break;
+		case 4:
+			vol = it_xm_squarewave[playing->tremolo_time];
+			break;
+		case 5:
+			vol = it_xm_ramp[playing->tremolo_time];
+			break;
+		case 6:
+			vol = it_xm_ramp[255-((sigrenderer->sigdata->flags & IT_WAS_A_MOD)?playing->vibrato_time:playing->tremolo_time)];
+			break;
+		}
+		vol *= playing->tremolo_depth;
+
+		vol = (playing->volume << 5) + vol;
+
+		if (vol <= 0)
+			return 0;
+
+		if (vol > 64 << 5)
+			vol = 64 << 5;
+
+		if ( sigrenderer->sigdata->flags & IT_WAS_A_PTM )
+		{
+			int v = aiPTMVolScaled[ vol >> 5 ];
+			if ( vol < 64 << 5 )
+			{
+				int f = vol & ( ( 1 << 5 ) - 1 );
+				int f2 = ( 1 << 5 ) - f;
+				int v2 = aiPTMVolScaled[ ( vol >> 5 ) + 1 ];
+				v = ( v * f2 + v2 * f ) >> 5;
+			}
+			vol = v << 1;
+		}
+
+		volume *= vol; /* 64 << 5 */
+		volume *= playing->sample->global_volume; /* 64 */
+		volume *= playing->channel_volume; /* 64 */
+		volume *= sigrenderer->globalvolume; /* 128 */
+		volume *= sigrenderer->sigdata->mixing_volume; /* 128 */
+		volume *= 1.0f / ((64 << 5) * 64.0f * 64.0f * 128.0f * 128.0f);
+
+		if (volume && playing->instrument) {
+			if (playing->enabled_envelopes & IT_ENV_VOLUME && playing->env_instrument->volume_envelope.n_nodes) {
+				volume *= envelope_get_y(&playing->env_instrument->volume_envelope, &playing->volume_envelope);
+				volume *= 1.0f / (64 << IT_ENVELOPE_SHIFT);
+			}
+			volume *= playing->instrument->global_volume; /* 128 */
+			volume *= playing->fadeoutcount; /* 1024 */
+			volume *= 1.0f / (128.0f * 1024.0f);
+		}
+	}
+
+	return (float)volume;
+}
+
+
+
+static int apply_pan_envelope(IT_PLAYING *playing)
+{
+	if (playing->pan <= 64 << IT_ENVELOPE_SHIFT) {
+		int pan;
+		if (playing->panbrello_depth) {
+			switch (playing->panbrello_waveform) {
+			default:
+				pan = it_sine[playing->panbrello_time];
+				break;
+			case 1:
+				pan = it_sawtooth[playing->panbrello_time];
+				break;
+			case 2:
+				pan = it_squarewave[playing->panbrello_time];
+				break;
+			case 3:
+				pan = playing->panbrello_random;
+				break;
+			}
+			pan *= playing->panbrello_depth << 3;
+
+			pan += playing->pan;
+			if (pan < 0) pan = 0;
+			else if (pan > 64 << IT_ENVELOPE_SHIFT) pan = 64 << IT_ENVELOPE_SHIFT;
+		} else {
+			pan = playing->pan;
+		}
+
+		if (playing->env_instrument && (playing->enabled_envelopes & IT_ENV_PANNING)) {
+			int p = envelope_get_y(&playing->env_instrument->pan_envelope, &playing->pan_envelope);
+			if (pan > 32 << IT_ENVELOPE_SHIFT)
+				p *= (64 << IT_ENVELOPE_SHIFT) - pan;
+			else
+				p *= pan;
+			pan += p >> (5 + IT_ENVELOPE_SHIFT);
+		}
+		return pan;
+	}
+	return playing->pan;
+}
+
+
+/* Note: if a click remover is provided, and store_end_sample is set, then
+ * the end point will be computed twice. This situation should not arise.
+ */
+static int32 render_playing(DUMB_IT_SIGRENDERER *sigrenderer, IT_PLAYING *playing, double volume, double main_delta, double delta, int32 pos, int32 size, sample_t **samples, int store_end_sample, int *left_to_mix)
+{
+	int bits;
+
+	int32 size_rendered;
+
+	DUMB_VOLUME_RAMP_INFO lvol, rvol;
+
+	if (playing->flags & IT_PLAYING_DEAD)
+		return 0;
+
+	if (*left_to_mix <= 0)
+		volume = 0;
+
+	{
+		int quality = sigrenderer->resampling_quality;
+		if (playing->sample->max_resampling_quality >= 0 && quality > playing->sample->max_resampling_quality)
+			quality = playing->sample->max_resampling_quality;
+		playing->resampler.quality = quality;
+		resampler_set_quality(playing->resampler.fir_resampler[0], quality - DUMB_RESAMPLER_BASE);
+		resampler_set_quality(playing->resampler.fir_resampler[1], quality - DUMB_RESAMPLER_BASE);
+	}
+
+	bits = playing->sample->flags & IT_SAMPLE_16BIT ? 16 : 8;
+
+	if (volume == 0) {
+		if (playing->sample->flags & IT_SAMPLE_STEREO)
+			size_rendered = dumb_resample_n_2_2(bits, &playing->resampler, NULL, size, 0, 0, delta);
+		else
+			size_rendered = dumb_resample_n_1_2(bits, &playing->resampler, NULL, size, 0, 0, delta);
+	} else {
+		lvol.volume = playing->ramp_volume [0];
+		rvol.volume = playing->ramp_volume [1];
+		lvol.delta  = (float)(playing->ramp_delta [0] * main_delta);
+		rvol.delta  = (float)(playing->ramp_delta [1] * main_delta);
+		lvol.target = playing->float_volume [0];
+		rvol.target = playing->float_volume [1];
+		rvol.mix = lvol.mix = (float)volume;
+        lvol.declick_stage = rvol.declick_stage = playing->declick_stage;
+		if (sigrenderer->n_channels >= 2) {
+			if (playing->sample->flags & IT_SAMPLE_STEREO) {
+				if (sigrenderer->click_remover) {
+					sample_t click[2];
+					dumb_resample_get_current_sample_n_2_2(bits, &playing->resampler, &lvol, &rvol, click);
+					dumb_record_click(sigrenderer->click_remover[0], pos, click[0]);
+					dumb_record_click(sigrenderer->click_remover[1], pos, click[1]);
+				}
+				size_rendered = dumb_resample_n_2_2(bits, &playing->resampler, samples[0] + pos*2, size, &lvol, &rvol, delta);
+				if (store_end_sample) {
+					sample_t click[2];
+					dumb_resample_get_current_sample_n_2_2(bits, &playing->resampler, &lvol, &rvol, click);
+					samples[0][(pos + size_rendered) * 2] = click[0];
+					samples[0][(pos + size_rendered) * 2 + 1] = click[1];
+				}
+				if (sigrenderer->click_remover) {
+					sample_t click[2];
+					dumb_resample_get_current_sample_n_2_2(bits, &playing->resampler, &lvol, &rvol, click);
+					dumb_record_click(sigrenderer->click_remover[0], pos + size_rendered, -click[0]);
+					dumb_record_click(sigrenderer->click_remover[1], pos + size_rendered, -click[1]);
+				}
+			} else {
+				if (sigrenderer->click_remover) {
+					sample_t click[2];
+					dumb_resample_get_current_sample_n_1_2(bits, &playing->resampler, &lvol, &rvol, click);
+					dumb_record_click(sigrenderer->click_remover[0], pos, click[0]);
+					dumb_record_click(sigrenderer->click_remover[1], pos, click[1]);
+				}
+				size_rendered = dumb_resample_n_1_2(bits, &playing->resampler, samples[0] + pos*2, size, &lvol, &rvol, delta);
+				if (store_end_sample) {
+					sample_t click[2];
+					dumb_resample_get_current_sample_n_1_2(bits, &playing->resampler, &lvol, &rvol, click);
+					samples[0][(pos + size_rendered) * 2] = click[0];
+					samples[0][(pos + size_rendered) * 2 + 1] = click[1];
+				}
+				if (sigrenderer->click_remover) {
+					sample_t click[2];
+					dumb_resample_get_current_sample_n_1_2(bits, &playing->resampler, &lvol, &rvol, click);
+					dumb_record_click(sigrenderer->click_remover[0], pos + size_rendered, -click[0]);
+					dumb_record_click(sigrenderer->click_remover[1], pos + size_rendered, -click[1]);
+				}
+			}
+		}
+#if 0	// [RH] Don't need mono output
+		else {
+			if (playing->sample->flags & IT_SAMPLE_STEREO) {
+				if (sigrenderer->click_remover) {
+					sample_t click;
+					dumb_resample_get_current_sample_n_2_1(bits, &playing->resampler, &lvol, &rvol, &click);
+					dumb_record_click(sigrenderer->click_remover[0], pos, click);
+				}
+				size_rendered = dumb_resample_n_2_1(bits, &playing->resampler, samples[0] + pos, size, &lvol, &rvol, delta);
+				if (store_end_sample)
+					dumb_resample_get_current_sample_n_2_1(bits, &playing->resampler, &lvol, &rvol, &samples[0][pos + size_rendered]);
+				if (sigrenderer->click_remover) {
+					sample_t click;
+					dumb_resample_get_current_sample_n_2_1(bits, &playing->resampler, &lvol, &rvol, &click);
+					dumb_record_click(sigrenderer->click_remover[0], pos + size_rendered, -click);
+				}
+			} else {
+				if (sigrenderer->click_remover) {
+					sample_t click;
+					dumb_resample_get_current_sample_n_1_1(bits, &playing->resampler, &lvol, &click);
+					dumb_record_click(sigrenderer->click_remover[0], pos, click);
+				}
+				size_rendered = dumb_resample_n_1_1(bits, &playing->resampler, samples[0] + pos, size, &lvol, delta);
+				if (store_end_sample)
+					dumb_resample_get_current_sample_n_1_1(bits, &playing->resampler, &lvol, &samples[0][pos + size_rendered]);
+				if (sigrenderer->click_remover) {
+					sample_t click;
+					dumb_resample_get_current_sample_n_1_1(bits, &playing->resampler, &lvol, &click);
+					dumb_record_click(sigrenderer->click_remover[0], pos + size_rendered, -click);
+				}
+			}
+		}
+#endif
+		playing->ramp_volume [0] = lvol.volume;
+		playing->ramp_volume [1] = rvol.volume;
+        playing->declick_stage = (lvol.declick_stage > rvol.declick_stage) ? lvol.declick_stage : rvol.declick_stage;
+        if (playing->declick_stage >= 4)
+            playing->flags |= IT_PLAYING_DEAD;
+		(*left_to_mix)--;
+	}
+
+	if (playing->resampler.dir == 0)
+		playing->flags |= IT_PLAYING_DEAD;
+
+	return size_rendered;
+}
+
+typedef struct IT_TO_MIX
+{
+	IT_PLAYING *playing;
+	float volume;
+}
+IT_TO_MIX;
+
+
+
+static int CDECL it_to_mix_compare(const void *e1, const void *e2)
+{
+	if (((const IT_TO_MIX *)e1)->volume > ((const IT_TO_MIX *)e2)->volume)
+		return -1;
+
+	if (((const IT_TO_MIX *)e1)->volume < ((const IT_TO_MIX *)e2)->volume)
+		return 1;
+
+	return 0;
+}
+
+
+
+static void apply_pitch_modifications(DUMB_IT_SIGDATA *sigdata, IT_PLAYING *playing, double *delta, int *cutoff)
+{
+	{
+		int sample_vibrato_shift;
+		switch (playing->sample_vibrato_waveform)
+		{
+		default:
+			sample_vibrato_shift = it_sine[playing->sample_vibrato_time];
+			break;
+		case 1:
+			sample_vibrato_shift = it_sawtooth[playing->sample_vibrato_time];
+			break;
+		case 2:
+			sample_vibrato_shift = it_squarewave[playing->sample_vibrato_time];
+			break;
+		case 3:
+			sample_vibrato_shift = (rand() % 129) - 64;
+			break;
+		case 4:
+			sample_vibrato_shift = it_xm_squarewave[playing->sample_vibrato_time];
+			break;
+		case 5:
+			sample_vibrato_shift = it_xm_ramp[playing->sample_vibrato_time];
+			break;
+		case 6:
+			sample_vibrato_shift = it_xm_ramp[255-playing->sample_vibrato_time];
+			break;
+		}
+
+		if (sigdata->flags & IT_WAS_AN_XM) {
+			int depth = playing->sample->vibrato_depth; /* True depth */
+			if (playing->sample->vibrato_rate) {
+				depth *= playing->sample_vibrato_depth; /* Tick number */
+				depth /= playing->sample->vibrato_rate; /* XM sweep */
+			}
+			sample_vibrato_shift *= depth;
+		} else
+			sample_vibrato_shift *= playing->sample_vibrato_depth >> 8;
+
+		sample_vibrato_shift >>= 4;
+
+		if (sample_vibrato_shift) {
+			if ((sigdata->flags & IT_LINEAR_SLIDES) || !(sigdata->flags & IT_WAS_AN_XM))
+				*delta *= (float)pow(DUMB_PITCH_BASE, sample_vibrato_shift);
+			else {
+				/* complicated! */
+				double scale = *delta / playing->delta;
+
+				*delta = (1.0f / 65536.0f) / playing->delta;
+
+				*delta -= sample_vibrato_shift / AMIGA_DIVISOR;
+
+				if (*delta < (1.0f / 65536.0f) / 32767.0f) {
+					*delta = (1.0f / 65536.0f) / 32767.0f;
+				}
+
+				*delta = (1.0f / 65536.0f) / *delta * scale;
+			}
+		}
+	}
+
+	if (playing->env_instrument &&
+		(playing->enabled_envelopes & IT_ENV_PITCH))
+	{
+		int p = envelope_get_y(&playing->env_instrument->pitch_envelope, &playing->pitch_envelope);
+		if (playing->env_instrument->pitch_envelope.flags & IT_ENVELOPE_PITCH_IS_FILTER)
+			*cutoff = (*cutoff * (p+(32<<IT_ENVELOPE_SHIFT))) >> (6 + IT_ENVELOPE_SHIFT);
+		else
+			*delta *= (float)pow(DUMB_PITCH_BASE, p >> (IT_ENVELOPE_SHIFT - 7));
+	}
+}
+
+
+
+static void render_normal(DUMB_IT_SIGRENDERER *sigrenderer, double volume, double delta, int32 pos, int32 size, sample_t **samples)
+{
+	int i;
+
+	int n_to_mix = 0;
+	IT_TO_MIX to_mix[DUMB_IT_TOTAL_CHANNELS];
+	int left_to_mix = dumb_it_max_to_mix;
+
+	sample_t **samples_to_filter = NULL;
+
+	//int max_output = sigrenderer->max_output;
+
+	for (i = 0; i < DUMB_IT_N_CHANNELS; i++) {
+		if (sigrenderer->channel[i].playing && !(sigrenderer->channel[i].playing->flags & IT_PLAYING_DEAD)) {
+			to_mix[n_to_mix].playing = sigrenderer->channel[i].playing;
+			to_mix[n_to_mix].volume = volume == 0 ? 0 : calculate_volume(sigrenderer, sigrenderer->channel[i].playing, volume);
+			n_to_mix++;
+		}
+	}
+
+	for (i = 0; i < DUMB_IT_N_NNA_CHANNELS; i++) {
+		if (sigrenderer->playing[i]) { /* Won't be dead; it would have been freed. */
+			to_mix[n_to_mix].playing = sigrenderer->playing[i];
+			to_mix[n_to_mix].volume = volume == 0 ? 0 : calculate_volume(sigrenderer, sigrenderer->playing[i], volume);
+			n_to_mix++;
+		}
+	}
+
+	if (volume != 0)
+		qsort(to_mix, n_to_mix, sizeof(IT_TO_MIX), &it_to_mix_compare);
+
+	for (i = 0; i < n_to_mix; i++) {
+		IT_PLAYING *playing = to_mix[i].playing;
+		double note_delta = delta * playing->delta;
+		int cutoff = playing->filter_cutoff << IT_ENVELOPE_SHIFT;
+		//int output = min( playing->output, max_output );
+
+		apply_pitch_modifications(sigrenderer->sigdata, playing, &note_delta, &cutoff);
+
+		if (cutoff != 127 << IT_ENVELOPE_SHIFT || playing->filter_resonance != 0) {
+			playing->true_filter_cutoff = cutoff;
+			playing->true_filter_resonance = playing->filter_resonance;
+		}
+
+		if (volume && (playing->true_filter_cutoff != 127 << IT_ENVELOPE_SHIFT || playing->true_filter_resonance != 0)) {
+			if (!samples_to_filter) {
+				samples_to_filter = allocate_sample_buffer(sigrenderer->n_channels, size + 1);
+				if (!samples_to_filter) {
+					render_playing(sigrenderer, playing, 0, delta, note_delta, pos, size, NULL, 0, &left_to_mix);
+					continue;
+				}
+			}
+			{
+				int32 size_rendered;
+				DUMB_CLICK_REMOVER **cr = sigrenderer->click_remover;
+				dumb_silence(samples_to_filter[0], sigrenderer->n_channels * (size + 1));
+				sigrenderer->click_remover = NULL;
+				size_rendered = render_playing(sigrenderer, playing, volume, delta, note_delta, 0, size, samples_to_filter, 1, &left_to_mix);
+				sigrenderer->click_remover = cr;
+				if (sigrenderer->n_channels == 2) {
+					it_filter(cr ? cr[0] : NULL, &playing->filter_state[0], samples[0 /*output*/], pos, samples_to_filter[0], size_rendered,
+						2, (int)(65536.0f/delta), playing->true_filter_cutoff, playing->true_filter_resonance);
+					it_filter(cr ? cr[1] : NULL, &playing->filter_state[1], samples[0 /*output*/]+1, pos, samples_to_filter[0]+1, size_rendered,
+						2, (int)(65536.0f/delta), playing->true_filter_cutoff, playing->true_filter_resonance);
+				} else {
+					it_filter(cr ? cr[0] : NULL, &playing->filter_state[0], samples[0 /*output*/], pos, samples_to_filter[0], size_rendered,
+						1, (int)(65536.0f/delta), playing->true_filter_cutoff, playing->true_filter_resonance);
+				}
+				// FIXME: filtering is not prevented by low left_to_mix!
+				// FIXME: change 'warning' to 'FIXME' everywhere
+			}
+		} else {
+			it_reset_filter_state(&playing->filter_state[0]);
+			it_reset_filter_state(&playing->filter_state[1]);
+			render_playing(sigrenderer, playing, volume, delta, note_delta, pos, size, samples /*&samples[output]*/, 0, &left_to_mix);
+		}
+	}
+
+	destroy_sample_buffer(samples_to_filter);
+
+	for (i = 0; i < DUMB_IT_N_CHANNELS; i++) {
+		if (sigrenderer->channel[i].playing) {
+			//if ((sigrenderer->channel[i].playing->flags & (IT_PLAYING_BACKGROUND | IT_PLAYING_DEAD)) == (IT_PLAYING_BACKGROUND | IT_PLAYING_DEAD)) {
+			// This change was made so Gxx would work correctly when a note faded out or whatever. Let's hope nothing else was broken by it.
+			if (sigrenderer->channel[i].playing->flags & IT_PLAYING_DEAD) {
+				free_playing(sigrenderer, sigrenderer->channel[i].playing);
+				sigrenderer->channel[i].playing = NULL;
+			}
+		}
+	}
+
+	for (i = 0; i < DUMB_IT_N_NNA_CHANNELS; i++) {
+		if (sigrenderer->playing[i]) {
+			if (sigrenderer->playing[i]->flags & IT_PLAYING_DEAD) {
+				free_playing(sigrenderer, sigrenderer->playing[i]);
+				sigrenderer->playing[i] = NULL;
+			}
+		}
+	}
+}
+
+
+
+static void render_surround(DUMB_IT_SIGRENDERER *sigrenderer, double volume, double delta, int32 pos, int32 size, sample_t **samples)
+{
+	int i;
+
+	int n_to_mix = 0, n_to_mix_surround = 0;
+	IT_TO_MIX to_mix[DUMB_IT_TOTAL_CHANNELS];
+	IT_TO_MIX to_mix_surround[DUMB_IT_TOTAL_CHANNELS];
+	int left_to_mix = dumb_it_max_to_mix;
+
+	int saved_channels = sigrenderer->n_channels;
+
+	sample_t **samples_to_filter = NULL;
+
+	DUMB_CLICK_REMOVER **saved_cr = sigrenderer->click_remover;
+
+	//int max_output = sigrenderer->max_output;
+
+	for (i = 0; i < DUMB_IT_N_CHANNELS; i++) {
+		if (sigrenderer->channel[i].playing && !(sigrenderer->channel[i].playing->flags & IT_PLAYING_DEAD)) {
+			IT_PLAYING *playing = sigrenderer->channel[i].playing;
+			IT_TO_MIX *_to_mix = IT_IS_SURROUND_SHIFTED(playing->pan) ? to_mix_surround + n_to_mix_surround++ : to_mix + n_to_mix++;
+			_to_mix->playing = playing;
+			_to_mix->volume = volume == 0 ? 0 : calculate_volume(sigrenderer, playing, volume);
+		}
+	}
+
+	for (i = 0; i < DUMB_IT_N_NNA_CHANNELS; i++) {
+		if (sigrenderer->playing[i]) { /* Won't be dead; it would have been freed. */
+			IT_PLAYING *playing = sigrenderer->playing[i];
+			IT_TO_MIX *_to_mix = IT_IS_SURROUND_SHIFTED(playing->pan) ? to_mix_surround + n_to_mix_surround++ : to_mix + n_to_mix++;
+			_to_mix->playing = playing;
+			_to_mix->volume = volume == 0 ? 0 : calculate_volume(sigrenderer, playing, volume);
+		}
+	}
+
+	if (volume != 0) {
+		qsort(to_mix, n_to_mix, sizeof(IT_TO_MIX), &it_to_mix_compare);
+		qsort(to_mix_surround, n_to_mix_surround, sizeof(IT_TO_MIX), &it_to_mix_compare);
+	}
+
+	sigrenderer->n_channels = 2;
+
+	for (i = 0; i < n_to_mix; i++) {
+		IT_PLAYING *playing = to_mix[i].playing;
+		double note_delta = delta * playing->delta;
+		int cutoff = playing->filter_cutoff << IT_ENVELOPE_SHIFT;
+		//int output = min( playing->output, max_output );
+
+		apply_pitch_modifications(sigrenderer->sigdata, playing, &note_delta, &cutoff);
+
+		if (cutoff != 127 << IT_ENVELOPE_SHIFT || playing->filter_resonance != 0) {
+			playing->true_filter_cutoff = cutoff;
+			playing->true_filter_resonance = playing->filter_resonance;
+		}
+
+		if (volume && (playing->true_filter_cutoff != 127 << IT_ENVELOPE_SHIFT || playing->true_filter_resonance != 0)) {
+			if (!samples_to_filter) {
+				samples_to_filter = allocate_sample_buffer(sigrenderer->n_channels, size + 1);
+				if (!samples_to_filter) {
+					render_playing(sigrenderer, playing, 0, delta, note_delta, pos, size, NULL, 0, &left_to_mix);
+					continue;
+				}
+			}
+			{
+				long size_rendered;
+				DUMB_CLICK_REMOVER **cr = sigrenderer->click_remover;
+				dumb_silence(samples_to_filter[0], sigrenderer->n_channels * (size + 1));
+				sigrenderer->click_remover = NULL;
+				size_rendered = render_playing(sigrenderer, playing, volume, delta, note_delta, 0, size, samples_to_filter, 1, &left_to_mix);
+				sigrenderer->click_remover = cr;
+				it_filter(cr ? cr[0] : NULL, &playing->filter_state[0], samples[0 /*output*/], pos, samples_to_filter[0], size_rendered,
+					2, (int)(65536.0f/delta), playing->true_filter_cutoff, playing->true_filter_resonance);
+				it_filter(cr ? cr[1] : NULL, &playing->filter_state[1], samples[0 /*output*/]+1, pos, samples_to_filter[0]+1, size_rendered,
+					2, (int)(65536.0f/delta), playing->true_filter_cutoff, playing->true_filter_resonance);
+			}
+		} else {
+			it_reset_filter_state(&playing->filter_state[0]);
+			it_reset_filter_state(&playing->filter_state[1]);
+			render_playing(sigrenderer, playing, volume, delta, note_delta, pos, size, samples /*&samples[output]*/, 0, &left_to_mix);
+		}
+	}
+
+	sigrenderer->n_channels = 1;
+	sigrenderer->click_remover = saved_cr ? saved_cr + 2 : 0;
+
+	for (i = 0; i < n_to_mix_surround; i++) {
+		IT_PLAYING *playing = to_mix_surround[i].playing;
+		double note_delta = delta * playing->delta;
+		int cutoff = playing->filter_cutoff << IT_ENVELOPE_SHIFT;
+		//int output = min( playing->output, max_output );
+
+		apply_pitch_modifications(sigrenderer->sigdata, playing, &note_delta, &cutoff);
+
+		if (cutoff != 127 << IT_ENVELOPE_SHIFT || playing->filter_resonance != 0) {
+			playing->true_filter_cutoff = cutoff;
+			playing->true_filter_resonance = playing->filter_resonance;
+		}
+
+		if (volume && (playing->true_filter_cutoff != 127 << IT_ENVELOPE_SHIFT || playing->true_filter_resonance != 0)) {
+			if (!samples_to_filter) {
+				samples_to_filter = allocate_sample_buffer(sigrenderer->n_channels, size + 1);
+				if (!samples_to_filter) {
+					render_playing(sigrenderer, playing, 0, delta, note_delta, pos, size, NULL, 0, &left_to_mix);
+					continue;
+				}
+			}
+			{
+				long size_rendered;
+				DUMB_CLICK_REMOVER **cr = sigrenderer->click_remover;
+				dumb_silence(samples_to_filter[0], size + 1);
+				sigrenderer->click_remover = NULL;
+				size_rendered = render_playing(sigrenderer, playing, volume, delta, note_delta, 0, size, samples_to_filter, 1, &left_to_mix);
+				sigrenderer->click_remover = cr;
+				it_filter(cr ? cr[0] : NULL, &playing->filter_state[0], samples[1 /*output*/], pos, samples_to_filter[0], size_rendered,
+					1, (int)(65536.0f/delta), playing->true_filter_cutoff, playing->true_filter_resonance);
+				// FIXME: filtering is not prevented by low left_to_mix!
+				// FIXME: change 'warning' to 'FIXME' everywhere
+			}
+		} else {
+			it_reset_filter_state(&playing->filter_state[0]);
+			it_reset_filter_state(&playing->filter_state[1]);
+			render_playing(sigrenderer, playing, volume, delta, note_delta, pos, size, &samples[1], 0, &left_to_mix);
+		}
+	}
+
+	sigrenderer->n_channels = saved_channels;
+	sigrenderer->click_remover = saved_cr;
+
+	destroy_sample_buffer(samples_to_filter);
+
+	for (i = 0; i < DUMB_IT_N_CHANNELS; i++) {
+		if (sigrenderer->channel[i].playing) {
+			//if ((sigrenderer->channel[i].playing->flags & (IT_PLAYING_BACKGROUND | IT_PLAYING_DEAD)) == (IT_PLAYING_BACKGROUND | IT_PLAYING_DEAD)) {
+			// This change was made so Gxx would work correctly when a note faded out or whatever. Let's hope nothing else was broken by it.
+			if (sigrenderer->channel[i].playing->flags & IT_PLAYING_DEAD) {
+				free_playing(sigrenderer, sigrenderer->channel[i].playing);
+				sigrenderer->channel[i].playing = NULL;
+			}
+		}
+	}
+
+	for (i = 0; i < DUMB_IT_N_NNA_CHANNELS; i++) {
+		if (sigrenderer->playing[i]) {
+			if (sigrenderer->playing[i]->flags & IT_PLAYING_DEAD) {
+				free_playing(sigrenderer, sigrenderer->playing[i]);
+				sigrenderer->playing[i] = NULL;
+			}
+		}
+	}
+}
+
+
+
+static void render(DUMB_IT_SIGRENDERER *sigrenderer, double volume, double delta, int32 pos, int32 size, sample_t **samples)
+{
+	if (size == 0) return;
+	if (sigrenderer->n_channels == 1 || sigrenderer->n_channels == 2)
+		render_normal(sigrenderer, volume, delta, pos, size, samples);
+	else if (sigrenderer->n_channels == 3)
+		render_surround(sigrenderer, volume, delta, pos, size, samples);
+}
+
+
+
+static DUMB_IT_SIGRENDERER *init_sigrenderer(DUMB_IT_SIGDATA *sigdata, int n_channels, int startorder, IT_CALLBACKS *callbacks, DUMB_CLICK_REMOVER **cr)
+{
+	DUMB_IT_SIGRENDERER *sigrenderer;
+	int i;
+
+	/* [RH] Mono destination mixers are disabled. */
+	if (n_channels != 2) {
+		return NULL;
+	}
+
+	if (startorder > sigdata->n_orders) {
+		free(callbacks);
+		dumb_destroy_click_remover_array(n_channels, cr);
+		return NULL;
+	}
+
+	sigrenderer = malloc(sizeof(*sigrenderer));
+	if (!sigrenderer) {
+		free(callbacks);
+		dumb_destroy_click_remover_array(n_channels, cr);
+		return NULL;
+	}
+
+	sigrenderer->free_playing = NULL;
+	sigrenderer->callbacks = callbacks;
+	sigrenderer->click_remover = cr;
+
+	sigrenderer->sigdata = sigdata;
+	sigrenderer->n_channels = n_channels;
+	sigrenderer->resampling_quality = dumb_resampling_quality;
+    sigrenderer->ramp_style = DUMB_IT_RAMP_FULL;
+	sigrenderer->globalvolume = sigdata->global_volume;
+	sigrenderer->tempo = sigdata->tempo;
+
+	for (i = 0; i < DUMB_IT_N_CHANNELS; i++) {
+		IT_CHANNEL *channel = &sigrenderer->channel[i];
+#if IT_CHANNEL_MUTED != 1
+#error this is wrong
+#endif
+		channel->flags = sigdata->channel_pan[i] >> 7;
+		channel->volume = (sigdata->flags & IT_WAS_AN_XM) ? 0 : 64;
+		channel->pan = sigdata->channel_pan[i] & 0x7F;
+		channel->truepan = channel->pan << IT_ENVELOPE_SHIFT;
+		channel->channelvolume = sigdata->channel_volume[i];
+		channel->instrument = 0;
+		channel->sample = 0;
+		channel->note = IT_NOTE_OFF;
+		channel->SFmacro = 0;
+		channel->filter_cutoff = 127;
+		channel->filter_resonance = 0;
+		channel->new_note_action = 0xFF;
+		channel->xm_retrig = 0;
+		channel->retrig_tick = 0;
+		channel->tremor_time = 0;
+		channel->vibrato_waveform = 0;
+		channel->tremolo_waveform = 0;
+		channel->panbrello_waveform = 0;
+		channel->glissando = 0;
+		channel->toneslide = 0;
+		channel->ptm_toneslide = 0;
+		channel->ptm_last_toneslide = 0;
+		channel->okt_toneslide = 0;
+		channel->midi_state = 0;
+		channel->lastvolslide = 0;
+		channel->lastDKL = 0;
+		channel->lastEF = 0;
+		channel->lastG = 0;
+		channel->lastHspeed = 0;
+		channel->lastHdepth = 0;
+		channel->lastRspeed = 0;
+		channel->lastRdepth = 0;
+		channel->lastYspeed = 0;
+		channel->lastYdepth = 0;
+		channel->lastI = 0;
+		channel->lastJ = 0;
+		channel->lastN = 0;
+		channel->lastO = 0;
+		channel->high_offset = 0;
+		channel->lastP = 0;
+		channel->lastQ = 0;
+		channel->lastS = 0;
+		channel->pat_loop_row = 0;
+		channel->pat_loop_count = 0;
+		channel->pat_loop_end_row = 0;
+		channel->lastW = 0;
+		channel->xm_lastE1 = 0;
+		channel->xm_lastE2 = 0;
+		channel->xm_lastEA = 0;
+		channel->xm_lastEB = 0;
+		channel->xm_lastX1 = 0;
+		channel->xm_lastX2 = 0;
+		channel->inv_loop_delay = 0;
+		channel->inv_loop_speed = 0;
+		channel->inv_loop_offset = 0;
+		channel->playing = NULL;
+#ifdef BIT_ARRAY_BULLSHIT
+		channel->played_patjump = NULL;
+		channel->played_patjump_order = 0xFFFE;
+#endif
+		//channel->output = 0;
+	}
+
+	if (sigdata->flags & IT_WAS_A_669)
+		reset_effects(sigrenderer);
+
+	for (i = 0; i < DUMB_IT_N_NNA_CHANNELS; i++)
+		sigrenderer->playing[i] = NULL;
+
+	sigrenderer->speed = sigdata->speed;
+
+	sigrenderer->processrow = 0xFFFE;
+	sigrenderer->n_rows = 0;
+	sigrenderer->breakrow = 0;
+	sigrenderer->rowcount = 1;
+	sigrenderer->order = startorder;
+	/* meh!
+	if (startorder > 0) {
+		int n;
+		for (n = startorder - 1; n >= 0; n--) {
+			if (sigdata->order[n] > sigdata->n_patterns) {
+				sigrenderer->restart_position = n + 1;
+				break;
+			}
+		}
+	}
+	*/
+	if (startorder > 0) {
+		sigrenderer->restart_position = startorder;
+	} else {
+		sigrenderer->restart_position = sigdata->restart_position;
+	}
+
+	sigrenderer->row = 0;
+	sigrenderer->processorder = startorder - 1;
+	sigrenderer->tick = 1;
+
+#ifdef BIT_ARRAY_BULLSHIT
+	sigrenderer->played = bit_array_create(sigdata->n_orders * 256);
+#endif
+
+	{
+		int order;
+		for (order = 0; order < sigdata->n_orders; order++) {
+			int n = sigdata->order[order];
+			if (n < sigdata->n_patterns) goto found_valid_order;
+#ifdef INVALID_ORDERS_END_SONG
+			if (n != IT_ORDER_SKIP)
+#else
+			if (n == IT_ORDER_END)
+#endif
+				break;
+
+#ifdef BIT_ARRAY_BULLSHIT
+			/* Fix for played order detection for songs which have skips at the start of the orders list */
+			for (n = 0; n < 256; n++) {
+				bit_array_set(sigrenderer->played, order * 256 + n);
+			}
+#endif
+		}
+		/* If we get here, there were no valid orders in the song. */
+		_dumb_it_end_sigrenderer(sigrenderer);
+		return NULL;
+	}
+	found_valid_order:
+
+	sigrenderer->time_left = 0;
+	sigrenderer->sub_time_left = 0;
+
+#ifdef BIT_ARRAY_BULLSHIT
+	sigrenderer->played = bit_array_create(sigdata->n_orders * 256);
+#endif
+
+	sigrenderer->gvz_time = 0;
+	sigrenderer->gvz_sub_time = 0;
+
+	//sigrenderer->max_output = 0;
+
+	if ( !(sigdata->flags & IT_WAS_PROCESSED) ) {
+		dumb_it_add_lpc( sigdata );
+
+		sigdata->flags |= IT_WAS_PROCESSED;
+	}
+
+	return sigrenderer;
+}
+
+
+void DUMBEXPORT dumb_it_set_resampling_quality(DUMB_IT_SIGRENDERER * sigrenderer, int quality)
+{
+	if (sigrenderer && quality >= 0 && quality < DUMB_RQ_N_LEVELS)
+	{
+		int i;
+		sigrenderer->resampling_quality = quality;
+		for (i = 0; i < DUMB_IT_N_CHANNELS; i++) {
+			if (sigrenderer->channel[i].playing)
+			{
+				IT_PLAYING * playing = sigrenderer->channel[i].playing;
+				playing->resampling_quality = quality;
+				playing->resampler.quality = quality;
+				resampler_set_quality(playing->resampler.fir_resampler[0], quality - DUMB_RESAMPLER_BASE);
+				resampler_set_quality(playing->resampler.fir_resampler[1], quality - DUMB_RESAMPLER_BASE);
+			}
+		}
+		for (i = 0; i < DUMB_IT_N_NNA_CHANNELS; i++) {
+			if (sigrenderer->playing[i]) {
+				IT_PLAYING * playing = sigrenderer->playing[i];
+				playing->resampling_quality = quality;
+				playing->resampler.quality = quality;
+				resampler_set_quality(playing->resampler.fir_resampler[0], quality - DUMB_RESAMPLER_BASE);
+				resampler_set_quality(playing->resampler.fir_resampler[1], quality - DUMB_RESAMPLER_BASE);
+			}
+		}
+	}
+}
+
+
+void DUMBEXPORT dumb_it_set_ramp_style(DUMB_IT_SIGRENDERER * sigrenderer, int ramp_style) {
+	if (sigrenderer && ramp_style >= 0 && ramp_style <= 2) {
+		sigrenderer->ramp_style = ramp_style;
+	}
+}
+
+
+void DUMBEXPORT dumb_it_set_loop_callback(DUMB_IT_SIGRENDERER *sigrenderer, int (DUMBCALLBACK *callback)(void *data), void *data)
+{
+	if (sigrenderer) {
+		sigrenderer->callbacks->loop = callback;
+		sigrenderer->callbacks->loop_data = data;
+	}
+}
+
+
+
+void DUMBEXPORT dumb_it_set_xm_speed_zero_callback(DUMB_IT_SIGRENDERER *sigrenderer, int (DUMBCALLBACK *callback)(void *data), void *data)
+{
+	if (sigrenderer) {
+		sigrenderer->callbacks->xm_speed_zero = callback;
+		sigrenderer->callbacks->xm_speed_zero_data = data;
+	}
+}
+
+
+
+void DUMBEXPORT dumb_it_set_midi_callback(DUMB_IT_SIGRENDERER *sigrenderer, int (DUMBCALLBACK *callback)(void *data, int channel, unsigned char midi_byte), void *data)
+{
+	if (sigrenderer) {
+		sigrenderer->callbacks->midi = callback;
+		sigrenderer->callbacks->midi_data = data;
+	}
+}
+
+
+
+void DUMBEXPORT dumb_it_set_global_volume_zero_callback(DUMB_IT_SIGRENDERER *sigrenderer, int (DUMBCALLBACK *callback)(void *data), void *data)
+{
+	if (sigrenderer) {
+		sigrenderer->callbacks->global_volume_zero = callback;
+		sigrenderer->callbacks->global_volume_zero_data = data;
+	}
+}
+
+
+
+static IT_CALLBACKS *create_callbacks(void)
+{
+	IT_CALLBACKS *callbacks = malloc(sizeof(*callbacks));
+	if (!callbacks) return NULL;
+	callbacks->loop = NULL;
+	callbacks->xm_speed_zero = NULL;
+	callbacks->midi = NULL;
+	callbacks->global_volume_zero = NULL;
+	return callbacks;
+}
+
+
+
+static DUMB_IT_SIGRENDERER *dumb_it_init_sigrenderer(DUMB_IT_SIGDATA *sigdata, int n_channels, int startorder)
+{
+	IT_CALLBACKS *callbacks;
+
+	if (!sigdata) return NULL;
+
+	callbacks = create_callbacks();
+	if (!callbacks) return NULL;
+
+	return init_sigrenderer(sigdata, n_channels, startorder, callbacks,
+		dumb_create_click_remover_array(n_channels));
+}
+
+
+
+DUH_SIGRENDERER *DUMBEXPORT dumb_it_start_at_order(DUH *duh, int n_channels, int startorder)
+{
+	DUMB_IT_SIGDATA *itsd = duh_get_it_sigdata(duh);
+	DUMB_IT_SIGRENDERER *itsr = dumb_it_init_sigrenderer(itsd, n_channels, startorder);
+	/*duh->length = dumb_it_build_checkpoints(itsd, startorder);*/
+	return duh_encapsulate_it_sigrenderer(itsr, n_channels, 0);
+}
+
+
+
+static sigrenderer_t *it_start_sigrenderer(DUH *duh, sigdata_t *vsigdata, int n_channels, int32 pos)
+{
+	DUMB_IT_SIGDATA *sigdata = vsigdata;
+	DUMB_IT_SIGRENDERER *sigrenderer;
+
+	(void)duh;
+
+	{
+		IT_CALLBACKS *callbacks = create_callbacks();
+		if (!callbacks) return NULL;
+
+		if (sigdata->checkpoint) {
+			IT_CHECKPOINT *checkpoint = sigdata->checkpoint;
+			while (checkpoint->next && checkpoint->next->time < pos)
+				checkpoint = checkpoint->next;
+			sigrenderer = dup_sigrenderer(checkpoint->sigrenderer, n_channels, callbacks);
+			if (!sigrenderer) return NULL;
+			sigrenderer->click_remover = dumb_create_click_remover_array(n_channels);
+			pos -= checkpoint->time;
+		} else {
+			sigrenderer = init_sigrenderer(sigdata, n_channels, 0, callbacks,
+				dumb_create_click_remover_array(n_channels));
+			if (!sigrenderer) return NULL;
+		}
+	}
+
+	while (pos > 0 && pos >= sigrenderer->time_left) {
+		render(sigrenderer, 0, 1.0f, 0, sigrenderer->time_left, NULL);
+
+		pos -= sigrenderer->time_left;
+		sigrenderer->time_left = 0;
+
+		if (process_tick(sigrenderer)) {
+			_dumb_it_end_sigrenderer(sigrenderer);
+			return NULL;
+		}
+	}
+
+	render(sigrenderer, 0, 1.0f, 0, pos, NULL);
+	sigrenderer->time_left -= pos;
+
+	return sigrenderer;
+}
+
+
+
+static int32 it_sigrenderer_get_samples(
+	sigrenderer_t *vsigrenderer,
+	double volume, double delta,
+	int32 size, sample_t **samples
+)
+{
+	DUMB_IT_SIGRENDERER *sigrenderer = vsigrenderer;
+	int32 pos;
+	int dt;
+	int32 todo;
+	LONG_LONG t;
+
+	if (sigrenderer->order < 0) return 0; // problematic
+
+	pos = 0;
+	dt = (int)(delta * 65536.0f + 0.5f);
+
+	/* When samples is finally used in render_playing(), it won't be used if
+	 * volume is 0.
+	 */
+	if (!samples) volume = 0;
+
+	for (;;) {
+		todo = (long)((((LONG_LONG)sigrenderer->time_left << 16) | sigrenderer->sub_time_left) / dt);
+
+		if (todo >= size)
+			break;
+
+		render(sigrenderer, volume, delta, pos, todo, samples);
+
+		pos += todo;
+		size -= todo;
+
+		t = sigrenderer->sub_time_left - (LONG_LONG)todo * dt;
+		sigrenderer->sub_time_left = (int32)t & 65535;
+		sigrenderer->time_left += (int32)(t >> 16);
+
+		if (process_tick(sigrenderer)) {
+			sigrenderer->order = -1;
+			sigrenderer->row = -1;
+			return pos;
+		}
+	}
+
+	render(sigrenderer, volume, delta, pos, size, samples);
+
+	pos += size;
+
+	t = sigrenderer->sub_time_left - (LONG_LONG)size * dt;
+	sigrenderer->sub_time_left = (int32)t & 65535;
+	sigrenderer->time_left += (int32)(t >> 16);
+
+	if (samples)
+		dumb_remove_clicks_array(sigrenderer->n_channels, sigrenderer->click_remover, samples, pos, 512.0f / delta);
+
+	return pos;
+}
+
+
+
+static void it_sigrenderer_get_current_sample(sigrenderer_t *vsigrenderer, double volume, sample_t *samples)
+{
+	DUMB_IT_SIGRENDERER *sigrenderer = vsigrenderer;
+	(void)volume; // for consideration: in any of these such functions, is 'volume' going to be required?
+	dumb_click_remover_get_offset_array(sigrenderer->n_channels, sigrenderer->click_remover, samples);
+}
+
+
+
+void _dumb_it_end_sigrenderer(sigrenderer_t *vsigrenderer)
+{
+	DUMB_IT_SIGRENDERER *sigrenderer = vsigrenderer;
+
+	int i;
+
+	if (sigrenderer) {
+		IT_PLAYING *playing, *next;
+
+		for (i = 0; i < DUMB_IT_N_CHANNELS; i++) {
+			if (sigrenderer->channel[i].playing)
+				free_playing_orig(sigrenderer->channel[i].playing);
+#ifdef BIT_ARRAY_BULLSHIT
+			bit_array_destroy(sigrenderer->channel[i].played_patjump);
+#endif
+		}
+
+		for (i = 0; i < DUMB_IT_N_NNA_CHANNELS; i++)
+			if (sigrenderer->playing[i])
+				free_playing_orig(sigrenderer->playing[i]);
+
+		for (playing = sigrenderer->free_playing; playing != NULL; playing = next)
+		{
+			next = playing->next;
+			free_playing_orig(playing);
+		}
+
+		dumb_destroy_click_remover_array(sigrenderer->n_channels, sigrenderer->click_remover);
+
+		if (sigrenderer->callbacks)
+			free(sigrenderer->callbacks);
+
+#ifdef BIT_ARRAY_BULLSHIT
+		bit_array_destroy(sigrenderer->played);
+#endif
+
+		free(vsigrenderer);
+	}
+}
+
+
+
+DUH_SIGTYPE_DESC _dumb_sigtype_it = {
+	SIGTYPE_IT,
+	NULL,
+	&it_start_sigrenderer,
+	NULL,
+	&it_sigrenderer_get_samples,
+	&it_sigrenderer_get_current_sample,
+	&_dumb_it_end_sigrenderer,
+	&_dumb_it_unload_sigdata
+};
+
+
+
+DUH_SIGRENDERER *DUMBEXPORT duh_encapsulate_it_sigrenderer(DUMB_IT_SIGRENDERER *it_sigrenderer, int n_channels, int32 pos)
+{
+	return duh_encapsulate_raw_sigrenderer(it_sigrenderer, &_dumb_sigtype_it, n_channels, pos);
+}
+
+
+
+DUMB_IT_SIGRENDERER *DUMBEXPORT duh_get_it_sigrenderer(DUH_SIGRENDERER *sigrenderer)
+{
+	return duh_get_raw_sigrenderer(sigrenderer, SIGTYPE_IT);
+}
+
+
+
+/* Values of 64 or more will access NNA channels here. */
+void DUMBEXPORT dumb_it_sr_get_channel_state(DUMB_IT_SIGRENDERER *sr, int channel, DUMB_IT_CHANNEL_STATE *state)
+{
+	IT_PLAYING *playing;
+	int t; /* temporary var for holding accurate pan and filter cutoff */
+	double delta;
+	ASSERT(channel < DUMB_IT_TOTAL_CHANNELS);
+	if (!sr) { state->sample = 0; return; }
+	if (channel >= DUMB_IT_N_CHANNELS) {
+		playing = sr->playing[channel - DUMB_IT_N_CHANNELS];
+		if (!playing) { state->sample = 0; return; }
+	} else {
+		playing = sr->channel[channel].playing;
+		if (!playing) { state->sample = 0; return; }
+	}
+
+	if (playing->flags & IT_PLAYING_DEAD) { state->sample = 0; return; }
+
+	state->channel = (int)(playing->channel - sr->channel);
+	state->sample = playing->sampnum;
+	state->volume = calculate_volume(sr, playing, 1.0f);
+
+	t = apply_pan_envelope(playing);
+	state->pan = (unsigned char)((t + 128) >> IT_ENVELOPE_SHIFT);
+	state->subpan = (signed char)t;
+
+	delta = playing->delta * 65536.0f;
+	t = playing->filter_cutoff << IT_ENVELOPE_SHIFT;
+	apply_pitch_modifications(sr->sigdata, playing, &delta, &t);
+	state->freq = (int)delta;
+	if (t == 127 << IT_ENVELOPE_SHIFT && playing->filter_resonance == 0) {
+		state->filter_resonance = playing->true_filter_resonance;
+		t = playing->true_filter_cutoff;
+	} else
+		state->filter_resonance = playing->filter_resonance;
+	state->filter_cutoff = (unsigned char)(t >> 8);
+	state->filter_subcutoff = (unsigned char)t;
+}
+
+
+
+int DUMBCALLBACK dumb_it_callback_terminate(void *data)
+{
+	(void)data;
+	return 1;
+}
+
+
+
+int DUMBCALLBACK dumb_it_callback_midi_block(void *data, int channel, unsigned char midi_byte)
+{
+	(void)data;
+	(void)channel;
+	(void)midi_byte;
+	return 1;
+}
+
+
+
+#define IT_CHECKPOINT_INTERVAL (30 * 65536) /* Half a minute */
+
+#define FUCKIT_THRESHOLD (120 * 60 * 65536) /* two hours? probably a pattern loop mess... */
+
+/* Returns the length of the module, up until it first loops. */
+int32 DUMBEXPORT dumb_it_build_checkpoints(DUMB_IT_SIGDATA *sigdata, int startorder)
+{
+	IT_CHECKPOINT *checkpoint;
+	if (!sigdata) return 0;
+	checkpoint = sigdata->checkpoint;
+	while (checkpoint) {
+		IT_CHECKPOINT *next = checkpoint->next;
+		_dumb_it_end_sigrenderer(checkpoint->sigrenderer);
+		free(checkpoint);
+		checkpoint = next;
+	}
+	sigdata->checkpoint = NULL;
+	checkpoint = malloc(sizeof(*checkpoint));
+	if (!checkpoint) return 0;
+	checkpoint->time = 0;
+	checkpoint->sigrenderer = dumb_it_init_sigrenderer(sigdata, 0, startorder);
+	if (!checkpoint->sigrenderer) {
+		free(checkpoint);
+		return 0;
+	}
+	checkpoint->sigrenderer->callbacks->loop = &dumb_it_callback_terminate;
+	checkpoint->sigrenderer->callbacks->xm_speed_zero = &dumb_it_callback_terminate;
+	checkpoint->sigrenderer->callbacks->global_volume_zero = &dumb_it_callback_terminate;
+
+	if (sigdata->checkpoint)
+	{
+		IT_CHECKPOINT *checkpoint = sigdata->checkpoint;
+		while (checkpoint) {
+			IT_CHECKPOINT *next = checkpoint->next;
+			_dumb_it_end_sigrenderer(checkpoint->sigrenderer);
+			free(checkpoint);
+			checkpoint = next;
+		}
+	}
+
+	sigdata->checkpoint = checkpoint;
+
+	for (;;) {
+		int32 l;
+		DUMB_IT_SIGRENDERER *sigrenderer = dup_sigrenderer(checkpoint->sigrenderer, 0, checkpoint->sigrenderer->callbacks);
+		checkpoint->sigrenderer->callbacks = NULL;
+		if (!sigrenderer) {
+			checkpoint->next = NULL;
+			return checkpoint->time;
+		}
+
+		l = it_sigrenderer_get_samples(sigrenderer, 0, 1.0f, IT_CHECKPOINT_INTERVAL, NULL);
+		if (l < IT_CHECKPOINT_INTERVAL) {
+			_dumb_it_end_sigrenderer(sigrenderer);
+			checkpoint->next = NULL;
+			return checkpoint->time + l;
+		}
+
+		checkpoint->next = malloc(sizeof(*checkpoint->next));
+		if (!checkpoint->next) {
+			_dumb_it_end_sigrenderer(sigrenderer);
+			return checkpoint->time + IT_CHECKPOINT_INTERVAL;
+		}
+
+		checkpoint->next->time = checkpoint->time + IT_CHECKPOINT_INTERVAL;
+		checkpoint = checkpoint->next;
+		checkpoint->sigrenderer = sigrenderer;
+
+		if (checkpoint->time >= FUCKIT_THRESHOLD) {
+			checkpoint->next = NULL;
+			return 0;
+		}
+	}
+}
+
+
+
+void DUMBEXPORT dumb_it_do_initial_runthrough(DUH *duh)
+{
+	if (duh) {
+		DUMB_IT_SIGDATA *sigdata = duh_get_it_sigdata(duh);
+
+		if (sigdata)
+			duh_set_length(duh, dumb_it_build_checkpoints(sigdata, 0));
+	}
+}
+
+static int is_pattern_silent(IT_PATTERN * pattern, int order) {
+	int ret = 1;
+	IT_ENTRY * entry, * end;
+	if (!pattern || !pattern->n_rows || !pattern->n_entries || !pattern->entry) return 2;
+
+	if ( pattern->n_entries == pattern->n_rows ) {
+		int n;
+		entry = pattern->entry;
+		for ( n = 0; n < pattern->n_entries; ++n, ++entry ) {
+			if ( !IT_IS_END_ROW(entry) ) break;
+		}
+		if ( n == pattern->n_entries ) return 2;
+		// broken?
+	}
+
+	entry = pattern->entry;
+	end = entry + pattern->n_entries;
+
+	while (entry < end) {
+		if (!IT_IS_END_ROW(entry)) {
+			if (entry->mask & (IT_ENTRY_INSTRUMENT | IT_ENTRY_VOLPAN))
+				return 0;
+			if (entry->mask & IT_ENTRY_NOTE && entry->note < 120)
+				return 0;
+			if (entry->mask & IT_ENTRY_EFFECT) {
+				switch (entry->effect) {
+					case IT_SET_GLOBAL_VOLUME:
+						if (entry->effectvalue) return 0;
+						break;
+
+					case IT_SET_SPEED:
+						if (entry->effectvalue > 64) ret++;
+						break;
+
+					case IT_SET_SONG_TEMPO:
+					case IT_XM_KEY_OFF:
+						break;
+
+					case IT_JUMP_TO_ORDER:
+						if (entry->effectvalue != order)
+							return 0;
+						break;
+
+					case IT_S:
+						switch (entry->effectvalue >> 4) {
+							case 0: // meh bastard
+								if ( entry->effectvalue != 0 ) return 0;
+								break;
+
+							case IT_S_FINE_PATTERN_DELAY:
+							case IT_S_PATTERN_LOOP:
+							case IT_S_PATTERN_DELAY:
+								ret++;
+								break;
+
+							case IT_S7:
+								if ((entry->effectvalue & 15) > 2)
+									return 0;
+								break;
+
+							default:
+								return 0;
+						}
+						break;
+
+					// clever idiot with his S L O W crap; do nothing
+					case IT_VOLSLIDE_TONEPORTA:
+					case IT_SET_SAMPLE_OFFSET:
+					case IT_GLOBAL_VOLUME_SLIDE:
+						if ( entry->effectvalue != 0 ) return 0;
+						break;
+
+					// genius also uses this instead of jump to order by mistake, meh, and it's bloody BCD
+					case IT_BREAK_TO_ROW:						
+						if ( ( ( entry->effectvalue >> 4 ) * 10 + ( entry->effectvalue & 15 ) ) != order ) return 0;
+						break;
+
+					default:
+						return 0;
+				}
+			}
+		}
+		entry++;
+	}
+
+	return ret;
+}
+
+int DUMBEXPORT dumb_it_trim_silent_patterns(DUH * duh) {
+	int n;
+	DUMB_IT_SIGDATA *sigdata;
+
+	if (!duh) return -1;
+
+	sigdata = duh_get_it_sigdata(duh);
+
+	if (!sigdata || !sigdata->order || !sigdata->pattern) return -1;
+
+	for (n = 0; n < sigdata->n_orders; n++) {
+		int p = sigdata->order[n];
+		if (p < sigdata->n_patterns) {
+			IT_PATTERN * pattern = &sigdata->pattern[p];
+			if (is_pattern_silent(pattern, n) > 1) {
+				pattern->n_rows = 1;
+				pattern->n_entries = 0;
+				if (pattern->entry)
+				{
+					free(pattern->entry);
+					pattern->entry = NULL;
+				}
+			} else
+				break;
+		}
+	}
+
+	if (n == sigdata->n_orders) return -1;
+
+	for (n = sigdata->n_orders - 1; n >= 0; n--) {
+		int p = sigdata->order[n];
+		if (p < sigdata->n_patterns) {
+			IT_PATTERN * pattern = &sigdata->pattern[p];
+			if (is_pattern_silent(pattern, n) > 1) {
+				pattern->n_rows = 1;
+				pattern->n_entries = 0;
+				if (pattern->entry)
+				{
+					free(pattern->entry);
+					pattern->entry = NULL;
+				}
+			} else
+				break;
+		}
+	}
+
+	if (n < 0) return -1;
+
+	/*duh->length = dumb_it_build_checkpoints(sigdata, 0);*/
+
+	return 0;
+}
+
+int DUMBEXPORT dumb_it_scan_for_playable_orders(DUMB_IT_SIGDATA *sigdata, dumb_scan_callback callback, void * callback_data)
+{
+	int n;
+	int32 length;
+	void * ba_played;
+	DUMB_IT_SIGRENDERER * sigrenderer;
+	
+	if (!sigdata->n_orders || !sigdata->order) return -1;
+
+	ba_played = bit_array_create(sigdata->n_orders * 256);
+	if (!ba_played) return -1;
+
+	/* Skip the first order, it should always be played */
+	for (n = 1; n < sigdata->n_orders; n++) {
+		if ((sigdata->order[n] >= sigdata->n_patterns) ||
+			(is_pattern_silent(&sigdata->pattern[sigdata->order[n]], n) > 1))
+			bit_array_set(ba_played, n * 256);
+	}
+
+	for (;;) {
+		for (n = 0; n < sigdata->n_orders; n++) {
+			if (!bit_array_test_range(ba_played, n * 256, 256)) break;
+		}
+
+		if (n == sigdata->n_orders) break;
+
+		sigrenderer = dumb_it_init_sigrenderer(sigdata, 0, n);
+		if (!sigrenderer) {
+			bit_array_destroy(ba_played);
+			return -1;
+		}
+		sigrenderer->callbacks->loop = &dumb_it_callback_terminate;
+		sigrenderer->callbacks->xm_speed_zero = &dumb_it_callback_terminate;
+		sigrenderer->callbacks->global_volume_zero = &dumb_it_callback_terminate;
+
+		length = 0;
+
+		for (;;) {
+			int32 l;
+
+			l = it_sigrenderer_get_samples(sigrenderer, 0, 1.0f, IT_CHECKPOINT_INTERVAL, NULL);
+			length += l;
+			if (l < IT_CHECKPOINT_INTERVAL || length >= FUCKIT_THRESHOLD) {
+				/* SONG OVA! */
+				break;
+			}
+		}
+
+		if ((*callback)(callback_data, n, length) < 0) return -1;
+
+		bit_array_merge(ba_played, sigrenderer->played, 0);
+
+		_dumb_it_end_sigrenderer(sigrenderer);
+	}
+
+	bit_array_destroy(ba_played);
+
+	return 0;
+}
diff --git a/libraries/dumb/src/it/itunload.c b/libraries/dumb/src/it/itunload.c
new file mode 100644
index 000000000..efed192a6
--- /dev/null
+++ b/libraries/dumb/src/it/itunload.c
@@ -0,0 +1,72 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * itunload.c - Code to free an Impulse Tracker       / / \  \
+ *              module from memory.                  | <  /   \_
+ *                                                   |  \/ /\   /
+ * By entheh.                                         \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <stdlib.h>
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+void _dumb_it_unload_sigdata(sigdata_t *vsigdata)
+{
+	if (vsigdata) {
+		DUMB_IT_SIGDATA *sigdata = vsigdata;
+		int n;
+
+		if (sigdata->song_message)
+			free(sigdata->song_message);
+
+		if (sigdata->order)
+			free(sigdata->order);
+
+		if (sigdata->instrument)
+			free(sigdata->instrument);
+
+		if (sigdata->sample) {
+			for (n = 0; n < sigdata->n_samples; n++)
+				if (sigdata->sample[n].data)
+					free(sigdata->sample[n].data);
+
+			free(sigdata->sample);
+		}
+
+		if (sigdata->pattern) {
+			for (n = 0; n < sigdata->n_patterns; n++)
+				if (sigdata->pattern[n].entry)
+					free(sigdata->pattern[n].entry);
+			free(sigdata->pattern);
+		}
+
+		if (sigdata->midi)
+			free(sigdata->midi);
+
+		{
+			IT_CHECKPOINT *checkpoint = sigdata->checkpoint;
+			while (checkpoint) {
+				IT_CHECKPOINT *next = checkpoint->next;
+				_dumb_it_end_sigrenderer(checkpoint->sigrenderer);
+				free(checkpoint);
+				checkpoint = next;
+			}
+		}
+
+		free(vsigdata);
+	}
+}
diff --git a/libraries/dumb/src/it/load669.c b/libraries/dumb/src/it/load669.c
new file mode 100644
index 000000000..38343be29
--- /dev/null
+++ b/libraries/dumb/src/it/load669.c
@@ -0,0 +1,42 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loadmod.c - Code to read a 669 Composer module     / / \  \
+ *             file, opening and closing it for      | <  /   \_
+ *             you.                                  |  \/ /\   /
+ *                                                    \_  /  > /
+ * By Chris Moeller                                     | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+/* dumb_load_669_quick(): loads a 669 file into a DUH struct, returning a
+ * pointer to the DUH struct. When you have finished with it, you must
+ * pass the pointer to unload_duh() so that the memory can be freed.
+ */
+DUH *DUMBEXPORT dumb_load_669_quick(const char *filename)
+{
+	DUH *duh;
+	DUMBFILE *f = dumbfile_open(filename);
+
+	if (!f)
+		return NULL;
+
+	duh = dumb_read_669_quick(f);
+
+	dumbfile_close(f);
+
+	return duh;
+}
diff --git a/libraries/dumb/src/it/load6692.c b/libraries/dumb/src/it/load6692.c
new file mode 100644
index 000000000..1f41c7aa0
--- /dev/null
+++ b/libraries/dumb/src/it/load6692.c
@@ -0,0 +1,34 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loadmod2.c - Code to read a 669 Composer module    / / \  \
+ *              file, opening and closing it for     | <  /   \_
+ *              you, and do an initial run-through.  |  \/ /\   /
+ *                                                    \_  /  > /
+ * By Chris Moeller                                     | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+/* dumb_load_669(): loads a 669 file into a DUH struct, returning a pointer
+ * to the DUH struct. When you have finished with it, you must pass the
+ * pointer to unload_duh() so that the memory can be freed.
+ */
+DUH *DUMBEXPORT dumb_load_669(const char *filename)
+{
+	DUH *duh = dumb_load_669_quick(filename);
+	dumb_it_do_initial_runthrough(duh);
+	return duh;
+}
diff --git a/libraries/dumb/src/it/loadamf.c b/libraries/dumb/src/it/loadamf.c
new file mode 100644
index 000000000..2be50f7f5
--- /dev/null
+++ b/libraries/dumb/src/it/loadamf.c
@@ -0,0 +1,42 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loadamf.c - Code to read a DSMI AMF module file,   / / \  \
+ *             opening and closing it for you.       | <  /   \_
+ *                                                   |  \/ /\   /
+ *                                                    \_  /  > /
+ * By Chris Moeller.                                    | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+/* dumb_load_amf_quick(): loads a AMF file into a DUH struct, returning a
+ * pointer to the DUH struct. When you have finished with it, you must
+ * pass the pointer to unload_duh() so that the memory can be freed.
+ */
+DUH *DUMBEXPORT dumb_load_amf_quick(const char *filename)
+{
+	DUH *duh;
+	DUMBFILE *f = dumbfile_open(filename);
+
+	if (!f)
+		return NULL;
+
+	duh = dumb_read_amf_quick(f);
+
+	dumbfile_close(f);
+
+	return duh;
+}
diff --git a/libraries/dumb/src/it/loadamf2.c b/libraries/dumb/src/it/loadamf2.c
new file mode 100644
index 000000000..83ed76810
--- /dev/null
+++ b/libraries/dumb/src/it/loadamf2.c
@@ -0,0 +1,34 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loadamf2.c - Code to read a DSMI AMF module file,  / / \  \
+ *              opening and closing it for you, and  | <  /   \_
+ *              do an initial run-through.           |  \/ /\   /
+ *                                                    \_  /  > /
+ *                                                      | \ / /
+ * By Chris Moeller.                                    |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+/* dumb_load_amf(): loads a AMF file into a DUH struct, returning a pointer
+ * to the DUH struct. When you have finished with it, you must pass the
+ * pointer to unload_duh() so that the memory can be freed.
+ */
+DUH *DUMBEXPORT dumb_load_amf(const char *filename)
+{
+	DUH *duh = dumb_load_amf_quick(filename);
+	dumb_it_do_initial_runthrough(duh);
+	return duh;
+}
diff --git a/libraries/dumb/src/it/loadany.c b/libraries/dumb/src/it/loadany.c
new file mode 100644
index 000000000..910e86a77
--- /dev/null
+++ b/libraries/dumb/src/it/loadany.c
@@ -0,0 +1,38 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loadany.c - Code to detect and read any of the     / / \  \
+ *             module formats supported by DUMB,     | <  /   \_
+ *             opening and closing the file for you. |  \/ /\   /
+ *                                                    \_  /  > /
+ * By Chris Moeller.                                    | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+DUH *DUMBEXPORT dumb_load_any_quick(const char *filename, int restrict_, int subsong)
+{
+	DUH *duh;
+	DUMBFILE *f = dumbfile_open(filename);
+
+	if (!f)
+		return NULL;
+
+    duh = dumb_read_any_quick(f, restrict_, subsong);
+
+	dumbfile_close(f);
+
+	return duh;
+}
diff --git a/libraries/dumb/src/it/loadany2.c b/libraries/dumb/src/it/loadany2.c
new file mode 100644
index 000000000..71590a0bf
--- /dev/null
+++ b/libraries/dumb/src/it/loadany2.c
@@ -0,0 +1,29 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loadany2.c - Code to detect and read any of the    / / \  \
+ *              module formats supported by DUMB,    | <  /   \_
+ *              opening and closing the file for     |  \/ /\   /
+ *              you, and do an initial run-through.   \_  /  > /
+ *                                                      | \ / /
+ * by Chris Moeller.                                    |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+
+
+
+DUH *DUMBEXPORT dumb_load_any(const char *filename, int restrict_, int subsong)
+{
+    DUH *duh = dumb_load_any_quick(filename, restrict_, subsong);
+	dumb_it_do_initial_runthrough(duh);
+	return duh;
+}
diff --git a/libraries/dumb/src/it/loadasy.c b/libraries/dumb/src/it/loadasy.c
new file mode 100644
index 000000000..5e9b2dd1d
--- /dev/null
+++ b/libraries/dumb/src/it/loadasy.c
@@ -0,0 +1,42 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loadasy.c - Code to read an ASYLUM Music Format    / / \  \
+ *             module file, opening and closing it   | <  /   \_
+ *             for you.                              |  \/ /\   /
+ *                                                    \_  /  > /
+ * By Chris Moeller.                                    | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+/* dumb_load_asy_quick(): loads a AMF file into a DUH struct, returning a
+ * pointer to the DUH struct. When you have finished with it, you must
+ * pass the pointer to unload_duh() so that the memory can be freed.
+ */
+DUH *DUMBEXPORT dumb_load_asy_quick(const char *filename)
+{
+	DUH *duh;
+	DUMBFILE *f = dumbfile_open(filename);
+
+	if (!f)
+		return NULL;
+
+	duh = dumb_read_asy_quick(f);
+
+	dumbfile_close(f);
+
+	return duh;
+}
diff --git a/libraries/dumb/src/it/loadasy2.c b/libraries/dumb/src/it/loadasy2.c
new file mode 100644
index 000000000..ecbc1ecbd
--- /dev/null
+++ b/libraries/dumb/src/it/loadasy2.c
@@ -0,0 +1,34 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loadasy2.c - Code to read an ASYLUM Music Format   / / \  \
+ *              module file, opening and closing it  | <  /   \_
+ *              for you, and do an initial run-      |  \/ /\   /
+ *              through.                              \_  /  > /
+ *                                                      | \ / /
+ * By Chris Moeller.                                    |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+/* dumb_load_asy(): loads a AMF file into a DUH struct, returning a pointer
+ * to the DUH struct. When you have finished with it, you must pass the
+ * pointer to unload_duh() so that the memory can be freed.
+ */
+DUH *DUMBEXPORT dumb_load_asy(const char *filename)
+{
+	DUH *duh = dumb_load_asy_quick(filename);
+	dumb_it_do_initial_runthrough(duh);
+	return duh;
+}
diff --git a/libraries/dumb/src/it/loadmod.c b/libraries/dumb/src/it/loadmod.c
new file mode 100644
index 000000000..c2239ccb2
--- /dev/null
+++ b/libraries/dumb/src/it/loadmod.c
@@ -0,0 +1,42 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loadmod.c - Code to read a good old-fashioned      / / \  \
+ *             Amiga module file, opening and        | <  /   \_
+ *             closing it for you.                   |  \/ /\   /
+ *                                                    \_  /  > /
+ * By entheh.                                           | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+/* dumb_load_mod_quick(): loads a MOD file into a DUH struct, returning a
+ * pointer to the DUH struct. When you have finished with it, you must
+ * pass the pointer to unload_duh() so that the memory can be freed.
+ */
+DUH *DUMBEXPORT dumb_load_mod_quick(const char *filename, int restrict_)
+{
+	DUH *duh;
+	DUMBFILE *f = dumbfile_open(filename);
+
+	if (!f)
+		return NULL;
+
+	duh = dumb_read_mod_quick(f, restrict_);
+
+	dumbfile_close(f);
+
+	return duh;
+}
diff --git a/libraries/dumb/src/it/loadmod2.c b/libraries/dumb/src/it/loadmod2.c
new file mode 100644
index 000000000..1051f1a8d
--- /dev/null
+++ b/libraries/dumb/src/it/loadmod2.c
@@ -0,0 +1,29 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loadmod2.c - Function to read a good old-          / / \  \
+ *              fashioned Amiga module file,         | <  /   \_
+ *              opening and closing it for you,      |  \/ /\   /
+ *              and do an initial run-through.        \_  /  > /
+ *                                                      | \ / /
+ * Split off from loadmod.c by entheh.                  |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+
+
+
+DUH *DUMBEXPORT dumb_load_mod(const char *filename, int restrict_)
+{
+	DUH *duh = dumb_load_mod_quick(filename, restrict_);
+	dumb_it_do_initial_runthrough(duh);
+	return duh;
+}
diff --git a/libraries/dumb/src/it/loadmtm.c b/libraries/dumb/src/it/loadmtm.c
new file mode 100644
index 000000000..5ce44249b
--- /dev/null
+++ b/libraries/dumb/src/it/loadmtm.c
@@ -0,0 +1,42 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loadmtm.c - Code to read a MultiTracker Module     / / \  \
+ *             file, opening and closing it for      | <  /   \_
+ *             you.                                  |  \/ /\   /
+ *                                                    \_  /  > /
+ * By Chris Moeller                                     | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+/* dumb_load_mtm_quick(): loads a MTM file into a DUH struct, returning a
+ * pointer to the DUH struct. When you have finished with it, you must
+ * pass the pointer to unload_duh() so that the memory can be freed.
+ */
+DUH *DUMBEXPORT dumb_load_mtm_quick(const char *filename)
+{
+	DUH *duh;
+	DUMBFILE *f = dumbfile_open(filename);
+
+	if (!f)
+		return NULL;
+
+	duh = dumb_read_mtm_quick(f);
+
+	dumbfile_close(f);
+
+	return duh;
+}
diff --git a/libraries/dumb/src/it/loadmtm2.c b/libraries/dumb/src/it/loadmtm2.c
new file mode 100644
index 000000000..13d303203
--- /dev/null
+++ b/libraries/dumb/src/it/loadmtm2.c
@@ -0,0 +1,34 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loadmtm2.c - Code to read a MultiTracker Module    / / \  \
+ *              file, opening and closing it for     | <  /   \_
+ *              you, and do an initial run-through.  |  \/ /\   /
+ *                                                    \_  /  > /
+ * By Chris Moeller                                     | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+/* dumb_load_mtm(): loads a MTM file into a DUH struct, returning a pointer
+ * to the DUH struct. When you have finished with it, you must pass the
+ * pointer to unload_duh() so that the memory can be freed.
+ */
+DUH *DUMBEXPORT dumb_load_mtm(const char *filename)
+{
+	DUH *duh = dumb_load_mtm_quick(filename);
+	dumb_it_do_initial_runthrough(duh);
+	return duh;
+}
diff --git a/libraries/dumb/src/it/loadokt.c b/libraries/dumb/src/it/loadokt.c
new file mode 100644
index 000000000..b1c73b8af
--- /dev/null
+++ b/libraries/dumb/src/it/loadokt.c
@@ -0,0 +1,42 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loadokt.c - Code to read an Oktalyzer module       / / \  \
+ *             file, opening and closing it for      | <  /   \_
+ *             you.                                  |  \/ /\   /
+ *                                                    \_  /  > /
+ * By Chris Moeller.                                    | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+/* dumb_load_okt_quick(): loads an OKT file into a DUH struct, returning a
+ * pointer to the DUH struct. When you have finished with it, you must
+ * pass the pointer to unload_duh() so that the memory can be freed.
+ */
+DUH *DUMBEXPORT dumb_load_okt_quick(const char *filename)
+{
+	DUH *duh;
+	DUMBFILE *f = dumbfile_open(filename);
+
+	if (!f)
+		return NULL;
+
+	duh = dumb_read_okt_quick(f);
+
+	dumbfile_close(f);
+
+	return duh;
+}
diff --git a/libraries/dumb/src/it/loadokt2.c b/libraries/dumb/src/it/loadokt2.c
new file mode 100644
index 000000000..f58da163a
--- /dev/null
+++ b/libraries/dumb/src/it/loadokt2.c
@@ -0,0 +1,29 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loadokt2.c - Function to read an Oktalyzer         / / \  \
+ *              module file, opening and closing     | <  /   \_
+ *              it for you, and do an initial run-   |  \/ /\   /
+ *              through.                              \_  /  > /
+ *                                                      | \ / /
+ * By Chris Moeller.                                    |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+
+
+
+DUH *DUMBEXPORT dumb_load_okt(const char *filename)
+{
+	DUH *duh = dumb_load_okt_quick(filename);
+	dumb_it_do_initial_runthrough(duh);
+	return duh;
+}
diff --git a/libraries/dumb/src/it/loadoldpsm.c b/libraries/dumb/src/it/loadoldpsm.c
new file mode 100644
index 000000000..2460d871a
--- /dev/null
+++ b/libraries/dumb/src/it/loadoldpsm.c
@@ -0,0 +1,43 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loadoldpsm.c - Code to read a ProTracker Studio    / / \  \
+ *                file, opening and closing it for   | <  /   \_
+ *                you.                               |  \/ /\   /
+ *                                                    \_  /  > /
+ * By Chris Moeller.                                    | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+/* dumb_load_old_psm_quick(): loads an old PSM file into a DUH struct,
+ * returning a pointer to the DUH struct. When you have finished with it,
+ * you must pass the pointer to unload_duh() so that the memory can be
+ * freed.
+ */
+DUH *DUMBEXPORT dumb_load_old_psm_quick(const char *filename)
+{
+	DUH *duh;
+	DUMBFILE *f = dumbfile_open(filename);
+
+	if (!f)
+		return NULL;
+
+	duh = dumb_read_old_psm_quick(f);
+
+	dumbfile_close(f);
+
+	return duh;
+}
diff --git a/libraries/dumb/src/it/loadoldpsm2.c b/libraries/dumb/src/it/loadoldpsm2.c
new file mode 100644
index 000000000..edd10db56
--- /dev/null
+++ b/libraries/dumb/src/it/loadoldpsm2.c
@@ -0,0 +1,34 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loadoldpsm2.c - Code to read a ProTracker Studio   / / \  \
+ *                 file, opening and closing it for  | <  /   \_
+ *                 you, and do an initial run-       |  \/ /\   /
+ *                 through.                           \_  /  > /
+ *                                                      | \ / /
+ * By Chris Moeller.                                    |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+/* dumb_load_old_psm(): loads an old PSM file into a DUH struct, returning
+ * a pointer to the DUH struct. When you have finished with it, you must
+ * pass the pointer to unload_duh() so that the memory can be freed.
+ */
+DUH *DUMBEXPORT dumb_load_old_psm(const char *filename)
+{
+	DUH *duh = dumb_load_old_psm_quick(filename);
+	dumb_it_do_initial_runthrough(duh);
+	return duh;
+}
diff --git a/libraries/dumb/src/it/loadpsm.c b/libraries/dumb/src/it/loadpsm.c
new file mode 100644
index 000000000..7e2405c61
--- /dev/null
+++ b/libraries/dumb/src/it/loadpsm.c
@@ -0,0 +1,42 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loadpsm.c - Code to read a ProTracker Studio       / / \  \
+ *             file, opening and closing it for      | <  /   \_
+ *             you.                                  |  \/ /\   /
+ *                                                    \_  /  > /
+ * By Chris Moeller.                                    | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+/* dumb_load_psm_quick(): loads a PSM file into a DUH struct, returning a
+ * pointer to the DUH struct. When you have finished with it, you must
+ * pass the pointer to unload_duh() so that the memory can be freed.
+ */
+DUH *DUMBEXPORT dumb_load_psm_quick(const char *filename, int subsong)
+{
+	DUH *duh;
+	DUMBFILE *f = dumbfile_open(filename);
+
+	if (!f)
+		return NULL;
+
+	duh = dumb_read_psm_quick(f, subsong);
+
+	dumbfile_close(f);
+
+	return duh;
+}
diff --git a/libraries/dumb/src/it/loadpsm2.c b/libraries/dumb/src/it/loadpsm2.c
new file mode 100644
index 000000000..c4b5132ff
--- /dev/null
+++ b/libraries/dumb/src/it/loadpsm2.c
@@ -0,0 +1,34 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loadpsm2.c - Code to read a ProTracker Studio      / / \  \
+ *              file, opening and closing it for     | <  /   \_
+ *              you, and do an initial run-through.  |  \/ /\   /
+ *                                                    \_  /  > /
+ * By Chris Moeller.                                    | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+/* dumb_load_psm(): loads a PSM file into a DUH struct, returning a pointer
+ * to the DUH struct. When you have finished with it, you must pass the
+ * pointer to unload_duh() so that the memory can be freed.
+ */
+DUH *DUMBEXPORT dumb_load_psm(const char *filename, int subsong)
+{
+	DUH *duh = dumb_load_psm_quick(filename, subsong);
+	dumb_it_do_initial_runthrough(duh);
+	return duh;
+}
diff --git a/libraries/dumb/src/it/loadptm.c b/libraries/dumb/src/it/loadptm.c
new file mode 100644
index 000000000..1ff066b45
--- /dev/null
+++ b/libraries/dumb/src/it/loadptm.c
@@ -0,0 +1,42 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loadptm.c - Code to read a Poly Tracker v2.03      / / \  \
+ *             file, opening and closing it for      | <  /   \_
+ *             you.                                  |  \/ /\   /
+ *                                                    \_  /  > /
+ * By Chris Moeller.                                    | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+/* dumb_load_ptm_quick(): loads a PTM file into a DUH struct, returning a
+ * pointer to the DUH struct. When you have finished with it, you must
+ * pass the pointer to unload_duh() so that the memory can be freed.
+ */
+DUH *DUMBEXPORT dumb_load_ptm_quick(const char *filename)
+{
+	DUH *duh;
+	DUMBFILE *f = dumbfile_open(filename);
+
+	if (!f)
+		return NULL;
+
+	duh = dumb_read_ptm_quick(f);
+
+	dumbfile_close(f);
+
+	return duh;
+}
diff --git a/libraries/dumb/src/it/loadptm2.c b/libraries/dumb/src/it/loadptm2.c
new file mode 100644
index 000000000..3e50735d0
--- /dev/null
+++ b/libraries/dumb/src/it/loadptm2.c
@@ -0,0 +1,34 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loadptm2.c - Code to read a Poly Tracker v2.03     / / \  \
+ *              file, opening and closing it for     | <  /   \_
+ *              you, and do an initial run-through.  |  \/ /\   /
+ *                                                    \_  /  > /
+ * By Chris Moeller.                                    | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+/* dumb_load_ptm(): loads a PTM file into a DUH struct, returning a pointer
+ * to the DUH struct. When you have finished with it, you must pass the
+ * pointer to unload_duh() so that the memory can be freed.
+ */
+DUH *DUMBEXPORT dumb_load_ptm(const char *filename)
+{
+	DUH *duh = dumb_load_ptm_quick(filename);
+	dumb_it_do_initial_runthrough(duh);
+	return duh;
+}
diff --git a/libraries/dumb/src/it/loadriff.c b/libraries/dumb/src/it/loadriff.c
new file mode 100644
index 000000000..84a8a4358
--- /dev/null
+++ b/libraries/dumb/src/it/loadriff.c
@@ -0,0 +1,42 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loadriff.c - Code to read a RIFF module file       / / \  \
+ *              opening and closing it for you.      | <  /   \_
+ *                                                   |  \/ /\   /
+ *                                                    \_  /  > /
+ * By Chris Moeller.                                    | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+/* dumb_load_riff_quick(): loads a RIFF file into a DUH struct, returning
+ * a pointer to the DUH struct. When you have finished with it, you must
+ * pass the pointer to unload_duh() so that the memory can be freed.
+ */
+DUH *DUMBEXPORT dumb_load_riff_quick( const char *filename )
+{
+	DUH * duh;
+	DUMBFILE * f = dumbfile_open( filename );
+
+	if ( ! f )
+		return NULL;
+
+	duh = dumb_read_riff_quick( f );
+
+	dumbfile_close( f );
+
+	return duh;
+}
diff --git a/libraries/dumb/src/it/loadriff2.c b/libraries/dumb/src/it/loadriff2.c
new file mode 100644
index 000000000..53466f1a5
--- /dev/null
+++ b/libraries/dumb/src/it/loadriff2.c
@@ -0,0 +1,29 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loadriff2.c - Code to read a RIFF module file      / / \  \
+ *               opening and closing it for you,     | <  /   \_
+ *               and do an initial run-through.      |  \/ /\   /
+ *                                                    \_  /  > /
+ * By Chris Moeller.                                    | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+
+
+
+DUH *DUMBEXPORT dumb_load_riff(const char *filename)
+{
+	DUH *duh = dumb_load_riff_quick(filename);
+	dumb_it_do_initial_runthrough(duh);
+	return duh;
+}
diff --git a/libraries/dumb/src/it/loads3m.c b/libraries/dumb/src/it/loads3m.c
new file mode 100644
index 000000000..09deb0f26
--- /dev/null
+++ b/libraries/dumb/src/it/loads3m.c
@@ -0,0 +1,42 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loads3m.c - Code to read a ScreamTracker 3         / / \  \
+ *             file, opening and closing it for      | <  /   \_
+ *             you.                                  |  \/ /\   /
+ *                                                    \_  /  > /
+ * By entheh.                                           | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+/* dumb_load_s3m_quick(): loads an S3M file into a DUH struct, returning
+ * a pointer to the DUH struct. When you have finished with it, you must
+ * pass the pointer to unload_duh() so that the memory can be freed.
+ */
+DUH *DUMBEXPORT dumb_load_s3m_quick(const char *filename)
+{
+	DUH *duh;
+	DUMBFILE *f = dumbfile_open(filename);
+
+	if (!f)
+		return NULL;
+
+	duh = dumb_read_s3m_quick(f);
+
+	dumbfile_close(f);
+
+	return duh;
+}
diff --git a/libraries/dumb/src/it/loads3m2.c b/libraries/dumb/src/it/loads3m2.c
new file mode 100644
index 000000000..7907775a8
--- /dev/null
+++ b/libraries/dumb/src/it/loads3m2.c
@@ -0,0 +1,29 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loads3m2.c - Function to read a ScreamTracker 3    / / \  \
+ *              file, opening and closing it for     | <  /   \_
+ *              you, and do an initial run-through.  |  \/ /\   /
+ *                                                    \_  /  > /
+ * Split off from loads3m.c by entheh.                  | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+
+
+
+DUH *DUMBEXPORT dumb_load_s3m(const char *filename)
+{
+	DUH *duh = dumb_load_s3m_quick(filename);
+	dumb_it_do_initial_runthrough(duh);
+	return duh;
+}
diff --git a/libraries/dumb/src/it/loadstm.c b/libraries/dumb/src/it/loadstm.c
new file mode 100644
index 000000000..2a533adb3
--- /dev/null
+++ b/libraries/dumb/src/it/loadstm.c
@@ -0,0 +1,42 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loadstm.c - Code to read a ScreamTracker 2         / / \  \
+ *             file, opening and closing it for      | <  /   \_
+ *             you.                                  |  \/ /\   /
+ *                                                    \_  /  > /
+ * By Chris Moeller.                                    | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+/* dumb_load_stm_quick(): loads an STM file into a DUH struct, returning a
+ * pointer to the DUH struct. When you have finished with it, you must
+ * pass the pointer to unload_duh() so that the memory can be freed.
+ */
+DUH *DUMBEXPORT dumb_load_stm_quick(const char *filename)
+{
+	DUH *duh;
+	DUMBFILE *f = dumbfile_open(filename);
+
+	if (!f)
+		return NULL;
+
+	duh = dumb_read_stm_quick(f);
+
+	dumbfile_close(f);
+
+	return duh;
+}
diff --git a/libraries/dumb/src/it/loadstm2.c b/libraries/dumb/src/it/loadstm2.c
new file mode 100644
index 000000000..491542bf3
--- /dev/null
+++ b/libraries/dumb/src/it/loadstm2.c
@@ -0,0 +1,29 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loadstm2.c - Function to read a ScreamTracker 2    / / \  \
+ *              file, opening and closing it for     | <  /   \_
+ *              you, and do an initial run-through.  |  \/ /\   /
+ *                                                    \_  /  > /
+ * By Chris Moeller.                                    | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+
+
+
+DUH *DUMBEXPORT dumb_load_stm(const char *filename)
+{
+	DUH *duh = dumb_load_stm_quick(filename);
+	dumb_it_do_initial_runthrough(duh);
+	return duh;
+}
diff --git a/libraries/dumb/src/it/loadxm.c b/libraries/dumb/src/it/loadxm.c
new file mode 100644
index 000000000..98ccd9301
--- /dev/null
+++ b/libraries/dumb/src/it/loadxm.c
@@ -0,0 +1,42 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loadxm.c - Code to read a Fast Tracker II          / / \  \
+ *            file, opening and closing it for       | <  /   \_
+ *            you.                                   |  \/ /\   /
+ *                                                    \_  /  > /
+ * By entheh.                                           | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+/* dumb_load_xm_quick(): loads an XM file into a DUH struct, returning a
+ * pointer to the DUH struct. When you have finished with it, you must
+ * pass the pointer to unload_duh() so that the memory can be freed.
+ */
+DUH *DUMBEXPORT dumb_load_xm_quick(const char *filename)
+{
+	DUH *duh;
+	DUMBFILE *f = dumbfile_open(filename);
+
+	if (!f)
+		return NULL;
+
+	duh = dumb_read_xm_quick(f);
+
+	dumbfile_close(f);
+
+	return duh;
+}
diff --git a/libraries/dumb/src/it/loadxm2.c b/libraries/dumb/src/it/loadxm2.c
new file mode 100644
index 000000000..61459b5b8
--- /dev/null
+++ b/libraries/dumb/src/it/loadxm2.c
@@ -0,0 +1,29 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * loadxm2.c - Function to read a Fast Tracker II     / / \  \
+ *             file, opening and closing it for      | <  /   \_
+ *             you, and do an initial run-through.   |  \/ /\   /
+ *                                                    \_  /  > /
+ * Split off from loadxm.c by entheh.                   | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+
+
+
+DUH *DUMBEXPORT dumb_load_xm(const char *filename)
+{
+	DUH *duh = dumb_load_xm_quick(filename);
+	dumb_it_do_initial_runthrough(duh);
+	return duh;
+}
diff --git a/libraries/dumb/src/it/ptmeffect.c b/libraries/dumb/src/it/ptmeffect.c
new file mode 100644
index 000000000..cbc2e90cf
--- /dev/null
+++ b/libraries/dumb/src/it/ptmeffect.c
@@ -0,0 +1,125 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * ptmeffect.c - Code for converting PTM              / / \  \
+ *               effects to IT effects.              | <  /   \_
+ *                                                   |  \/ /\   /
+ * By Chris Moeller. Based on xmeffect.c              \_  /  > /
+ * by Julien Cugniere.                                  | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "dumb.h"
+#include "internal/it.h"
+
+void _dumb_it_ptm_convert_effect(int effect, int value, IT_ENTRY *entry)
+{
+	if (effect >= PTM_N_EFFECTS)
+		return;
+
+	/* Linearisation of the effect number... */
+	if (effect == PTM_E) {
+		effect = PTM_EBASE + HIGH(value);
+		value = LOW(value);
+	}
+
+	/* convert effect */
+	entry->mask |= IT_ENTRY_EFFECT;
+	switch (effect) {
+
+		case PTM_APPREGIO:           effect = IT_ARPEGGIO;           break;
+		case PTM_PORTAMENTO_UP:      effect = IT_PORTAMENTO_UP;      break;
+		case PTM_PORTAMENTO_DOWN:    effect = IT_PORTAMENTO_DOWN;    break;
+		case PTM_TONE_PORTAMENTO:    effect = IT_TONE_PORTAMENTO;    break;
+		case PTM_VIBRATO:            effect = IT_VIBRATO;            break;
+		case PTM_VOLSLIDE_TONEPORTA: effect = IT_VOLSLIDE_TONEPORTA; break;
+		case PTM_VOLSLIDE_VIBRATO:   effect = IT_VOLSLIDE_VIBRATO;   break;
+		case PTM_TREMOLO:            effect = IT_TREMOLO;            break;
+		case PTM_SAMPLE_OFFSET:      effect = IT_SET_SAMPLE_OFFSET;  break;
+		case PTM_VOLUME_SLIDE:       effect = IT_VOLUME_SLIDE;       break;
+		case PTM_POSITION_JUMP:      effect = IT_JUMP_TO_ORDER;      break;
+		case PTM_SET_CHANNEL_VOLUME: effect = IT_SET_CHANNEL_VOLUME; break;
+		case PTM_PATTERN_BREAK:      effect = IT_BREAK_TO_ROW;       break;
+		case PTM_SET_GLOBAL_VOLUME:  effect = IT_SET_GLOBAL_VOLUME;  break;
+		case PTM_RETRIGGER:          effect = IT_RETRIGGER_NOTE;     break;
+		case PTM_FINE_VIBRATO:       effect = IT_FINE_VIBRATO;       break;
+
+		/* TODO properly */
+		case PTM_NOTE_SLIDE_UP:          effect = IT_PTM_NOTE_SLIDE_UP;          break;
+		case PTM_NOTE_SLIDE_DOWN:        effect = IT_PTM_NOTE_SLIDE_DOWN;        break;
+		case PTM_NOTE_SLIDE_UP_RETRIG:   effect = IT_PTM_NOTE_SLIDE_UP_RETRIG;   break;
+		case PTM_NOTE_SLIDE_DOWN_RETRIG: effect = IT_PTM_NOTE_SLIDE_DOWN_RETRIG; break;
+
+		case PTM_SET_TEMPO_BPM:
+			effect = (value < 0x20) ? (IT_SET_SPEED) : (IT_SET_SONG_TEMPO);
+			break;
+
+		case PTM_EBASE+PTM_E_SET_FINETUNE:          effect = SBASE+IT_S_FINETUNE;              break; /** TODO */
+		case PTM_EBASE+PTM_E_SET_LOOP:              effect = SBASE+IT_S_PATTERN_LOOP;          break;
+		case PTM_EBASE+PTM_E_NOTE_CUT:              effect = SBASE+IT_S_DELAYED_NOTE_CUT;      break;
+		case PTM_EBASE+PTM_E_NOTE_DELAY:            effect = SBASE+IT_S_NOTE_DELAY;            break;
+		case PTM_EBASE+PTM_E_PATTERN_DELAY:         effect = SBASE+IT_S_PATTERN_DELAY;         break;
+		case PTM_EBASE+PTM_E_SET_PANNING:           effect = SBASE+IT_S_SET_PAN;               break;
+
+		case PTM_EBASE+PTM_E_FINE_VOLSLIDE_UP:
+			effect = IT_VOLUME_SLIDE;
+			value = EFFECT_VALUE(value, 0xF);
+			break;
+
+		case PTM_EBASE + PTM_E_FINE_VOLSLIDE_DOWN:
+			effect = IT_VOLUME_SLIDE;
+			value = EFFECT_VALUE(0xF, value);
+			break;
+
+		case PTM_EBASE + PTM_E_FINE_PORTA_UP:
+			effect = IT_PORTAMENTO_UP;
+			value = EFFECT_VALUE(0xF, value);
+			break;
+
+		case PTM_EBASE + PTM_E_FINE_PORTA_DOWN:
+			effect = IT_PORTAMENTO_DOWN;
+			value = EFFECT_VALUE(0xF, value);
+			break;
+
+		case PTM_EBASE + PTM_E_RETRIG_NOTE:
+			effect = IT_XM_RETRIGGER_NOTE;
+			value = EFFECT_VALUE(0, value);
+			break;
+
+		case PTM_EBASE + PTM_E_SET_VIBRATO_CONTROL:
+			effect = SBASE+IT_S_SET_VIBRATO_WAVEFORM;
+			value &= ~4; /** TODO: value&4 -> don't retrig wave */
+			break;
+
+		case PTM_EBASE + PTM_E_SET_TREMOLO_CONTROL:
+			effect = SBASE+IT_S_SET_TREMOLO_WAVEFORM;
+			value &= ~4; /** TODO: value&4 -> don't retrig wave */
+			break;
+
+		default:
+			/* user effect (often used in demos for synchronisation) */
+			entry->mask &= ~IT_ENTRY_EFFECT;
+	}
+
+	/* Inverse linearisation... */
+	if (effect >= SBASE && effect < SBASE+16) {
+		value = EFFECT_VALUE(effect-SBASE, value);
+		effect = IT_S;
+	}
+
+	entry->effect = effect;
+	entry->effectvalue = value;
+}
diff --git a/libraries/dumb/src/it/read669.c b/libraries/dumb/src/it/read669.c
new file mode 100644
index 000000000..53332b497
--- /dev/null
+++ b/libraries/dumb/src/it/read669.c
@@ -0,0 +1,448 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * read669.c - Code to read a 669 Composer module     / / \  \
+ *             from an open file.                    | <  /   \_
+ *                                                   |  \/ /\   /
+ * By Chris Moeller.                                  \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+static int it_669_read_pattern(IT_PATTERN *pattern, DUMBFILE *f, int tempo, int breakpoint, unsigned char *buffer, int * used_channels)
+{
+	int pos;
+	int channel;
+	int row;
+	IT_ENTRY *entry;
+
+	pattern->n_rows = 64;
+
+    if (dumbfile_getnc((char *)buffer, 64 * 3 * 8, f) < 64 * 3 * 8)
+		return -1;
+
+	/* compute number of entries */
+	pattern->n_entries = 64 + 1; /* Account for the row end markers, speed command */
+	if (breakpoint < 63) pattern->n_entries++; /* and break to row 0 */
+
+	pos = 0;
+	for (row = 0; row < 64; row++) {
+		for (channel = 0; channel < 8; channel++) {
+			if (buffer[pos+0] != 0xFF || buffer[pos+2] != 0xFF)
+				pattern->n_entries++;
+			pos += 3;
+		}
+	}
+
+	pattern->entry = malloc(pattern->n_entries * sizeof(*pattern->entry));
+	if (!pattern->entry)
+		return -1;
+
+	if (breakpoint == 63) breakpoint++;
+
+	entry = pattern->entry;
+
+	entry->channel = 8;
+	entry->mask = IT_ENTRY_EFFECT;
+	entry->effect = IT_SET_SPEED;
+	entry->effectvalue = tempo;
+	entry++;
+
+	pos = 0;
+	for (row = 0; row < 64; row++) {
+
+		if (row == breakpoint) {
+			entry->channel = 8;
+			entry->mask = IT_ENTRY_EFFECT;
+			entry->effect = IT_BREAK_TO_ROW;
+			entry->effectvalue = 0;
+			entry++;
+		}
+
+		for (channel = 0; channel < 8; channel++) {
+			if (buffer[pos+0] != 0xFF || buffer[pos+2] != 0xFF) {
+				entry->channel = channel;
+				entry->mask = 0;
+
+				if (buffer[pos+0] < 0xFE) {
+					entry->mask |= IT_ENTRY_NOTE | IT_ENTRY_INSTRUMENT;
+					entry->note = (buffer[pos+0] >> 2) + 36;
+					entry->instrument = (((buffer[pos+0] << 4) | (buffer[pos+1] >> 4)) & 0x3F) + 1;
+				}
+				if (buffer[pos+0] <= 0xFE) {
+					entry->mask |= IT_ENTRY_VOLPAN;
+					entry->volpan = ((buffer[pos+1] & 15) << 6) / 15;
+					if (*used_channels < channel + 1) *used_channels = channel + 1;
+				}
+				if (buffer[pos+2] != 0xFF) {
+					entry->mask |= IT_ENTRY_EFFECT;
+					entry->effectvalue = buffer[pos+2] & 15;
+					switch (buffer[pos+2] >> 4) {
+						case 0:
+							entry->effect = IT_PORTAMENTO_UP;
+							break;
+						case 1:
+							entry->effect = IT_PORTAMENTO_DOWN;
+							break;
+						case 2:
+							entry->effect = IT_TONE_PORTAMENTO;
+							break;
+						case 3:
+							entry->effect = IT_S;
+							entry->effectvalue += IT_S_FINETUNE * 16 + 8;
+							break;
+						case 4:
+							entry->effect = IT_VIBRATO;
+							// XXX speed unknown
+							entry->effectvalue |= 0x10;
+							break;
+						case 5:
+							if (entry->effectvalue) {
+								entry->effect = IT_SET_SPEED;
+							} else {
+								entry->mask &= ~IT_ENTRY_EFFECT;
+							}
+							break;
+#if 0
+						/* dunno about this, really... */
+						case 6:
+							if (entry->effectvalue == 0) {
+								entry->effect = IT_PANNING_SLIDE;
+								entry->effectvalue = 0xFE;
+							} else if (entry->effectvalue == 1) {
+								entry->effect = IT_PANNING_SLIDE;
+								entry->effectvalue = 0xEF;
+							} else {
+								entry->mask &= ~IT_ENTRY_EFFECT;
+							}
+							break;
+#endif
+						default:
+							entry->mask &= ~IT_ENTRY_EFFECT;
+							break;
+					}
+					if (*used_channels < channel + 1) *used_channels = channel + 1;
+				}
+
+				entry++;
+			}
+			pos += 3;
+		}
+		IT_SET_END_ROW(entry);
+		entry++;
+	}
+
+	return 0;
+}
+
+
+
+static int it_669_read_sample_header(IT_SAMPLE *sample, DUMBFILE *f)
+{
+    dumbfile_getnc((char *)sample->name, 13, f);
+	sample->name[13] = 0;
+
+	sample->filename[0] = 0;
+
+	sample->length = dumbfile_igetl(f);
+	sample->loop_start = dumbfile_igetl(f);
+	sample->loop_end = dumbfile_igetl(f);
+
+	if (dumbfile_error(f))
+		return -1;
+
+	if (sample->length <= 0) {
+		sample->flags = 0;
+		return 0;
+	}
+
+	sample->flags = IT_SAMPLE_EXISTS;
+
+	sample->global_volume = 64;
+	sample->default_volume = 64;
+
+	sample->default_pan = 0;
+	sample->C5_speed = 8363;
+	// the above line might be wrong
+
+	if ((sample->loop_end > sample->length) && !(sample->loop_start))
+		sample->loop_end = 0;
+
+	if (sample->loop_end > sample->length)
+		sample->loop_end = sample->length;
+
+	if (sample->loop_end - sample->loop_start > 2)
+		sample->flags |= IT_SAMPLE_LOOP;
+
+	sample->vibrato_speed = 0;
+	sample->vibrato_depth = 0;
+	sample->vibrato_rate = 0;
+	sample->vibrato_waveform = 0; // do we have to set _all_ these?
+	sample->finetune = 0;
+	sample->max_resampling_quality = -1;
+
+	return 0;
+}
+
+
+
+static int it_669_read_sample_data(IT_SAMPLE *sample, DUMBFILE *f)
+{
+	int32 i;
+	int32 truncated_size;
+
+	/* let's get rid of the sample data coming after the end of the loop */
+	if ((sample->flags & IT_SAMPLE_LOOP) && sample->loop_end < sample->length) {
+		truncated_size = sample->length - sample->loop_end;
+		sample->length = sample->loop_end;
+	} else {
+		truncated_size = 0;
+	}
+
+	sample->data = malloc(sample->length);
+
+	if (!sample->data)
+		return -1;
+
+	if (sample->length)
+	{
+		i = dumbfile_getnc(sample->data, sample->length, f);
+		
+		if (i < sample->length) {
+			//return -1;
+			// ficking truncated files
+			if (i <= 0) {
+				sample->flags = 0;
+				return 0;
+			}
+			sample->length = i;
+			if (sample->loop_end > i) sample->loop_end = i;
+		} else {
+			/* skip truncated data */
+			dumbfile_skip(f, truncated_size);
+			// Should we be truncating it?
+			if (dumbfile_error(f))
+				return -1;
+		}
+
+		for (i = 0; i < sample->length; i++)
+			((signed char *)sample->data)[i] ^= 0x80;
+	}
+
+	return 0;
+}
+
+
+static DUMB_IT_SIGDATA *it_669_load_sigdata(DUMBFILE *f, int * ext)
+{
+	DUMB_IT_SIGDATA *sigdata;
+	int n_channels;
+	int i;
+	unsigned char tempolist[128];
+	unsigned char breaklist[128];
+
+	i = dumbfile_igetw(f);
+	if (i != 0x6669 && i != 0x4E4A) return NULL;
+
+	*ext = (i == 0x4E4A);
+
+	sigdata = malloc(sizeof(*sigdata));
+	if (!sigdata) {
+		return NULL;
+	}
+
+    if (dumbfile_getnc((char *)sigdata->name, 36, f) < 36) {
+		free(sigdata);
+		return NULL;
+	}
+	sigdata->name[36] = 0;
+
+	sigdata->order = NULL;
+	sigdata->instrument = NULL;
+	sigdata->pattern = NULL;
+	sigdata->midi = NULL;
+	sigdata->checkpoint = NULL;
+	sigdata->sample = NULL;
+
+	sigdata->n_instruments = 0;
+
+	sigdata->song_message = malloc(72 + 2 + 1);
+	if (!sigdata->song_message) {
+		free(sigdata);
+		return NULL;
+	}
+    if (dumbfile_getnc((char *)sigdata->song_message, 36, f) < 36) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+	sigdata->song_message[36] = 13;
+	sigdata->song_message[36 + 1] = 10;
+    if (dumbfile_getnc((char *)sigdata->song_message + 38, 36, f) < 36) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+	sigdata->song_message[38 + 36] = 0;
+
+	sigdata->n_samples = dumbfile_getc(f);
+	sigdata->n_patterns = dumbfile_getc(f);
+	sigdata->restart_position = dumbfile_getc(f);
+
+	if ((sigdata->n_samples) > 64 || (sigdata->n_patterns > 128)) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+	sigdata->order = malloc(128); /* We may need to scan the extra ones! */
+	if (!sigdata->order) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+    if (dumbfile_getnc((char *)sigdata->order, 128, f) < 128) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+	for (i = 0; i < 128; i++) {
+		if (sigdata->order[i] == 255) break;
+		if (sigdata->order[i] >= sigdata->n_patterns) {
+			_dumb_it_unload_sigdata(sigdata);
+			return NULL;
+		}
+	}
+	if (!i) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+	sigdata->n_orders = i;
+
+    if (dumbfile_getnc((char *)tempolist, 128, f) < 128) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+    if (dumbfile_getnc((char *)breaklist, 128, f) < 128) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+	sigdata->sample = malloc(sigdata->n_samples * sizeof(*sigdata->sample));
+	if (!sigdata->sample) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+	for (i = 0; i < sigdata->n_samples; i++)
+		sigdata->sample[i].data = NULL;
+
+	for (i = 0; i < sigdata->n_samples; i++) {
+		if (it_669_read_sample_header(&sigdata->sample[i], f)) {
+			_dumb_it_unload_sigdata(sigdata);
+			return NULL;
+		}
+	}
+
+	/* May as well try to save a tiny bit of memory. */
+	if (sigdata->n_orders < 128) {
+		unsigned char *order = realloc(sigdata->order, sigdata->n_orders);
+		if (order) sigdata->order = order;
+	}
+
+	sigdata->pattern = malloc(sigdata->n_patterns * sizeof(*sigdata->pattern));
+	if (!sigdata->pattern) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+	for (i = 0; i < sigdata->n_patterns; i++)
+		sigdata->pattern[i].entry = NULL;
+
+	n_channels = 0;
+
+	/* Read in the patterns */
+	{
+		unsigned char *buffer = malloc(64 * 3 * 8); /* 64 rows * 3 bytes * 8 channels */
+		if (!buffer) {
+			_dumb_it_unload_sigdata(sigdata);
+			return NULL;
+		}
+		for (i = 0; i < sigdata->n_patterns; i++) {
+			if (it_669_read_pattern(&sigdata->pattern[i], f, tempolist[i], breaklist[i], buffer, &n_channels) != 0) {
+				free(buffer);
+				_dumb_it_unload_sigdata(sigdata);
+				return NULL;
+			}
+		}
+		free(buffer);
+	}
+
+	sigdata->n_pchannels = n_channels;
+
+	/* And finally, the sample data */
+	for (i = 0; i < sigdata->n_samples; i++) {
+		if (it_669_read_sample_data(&sigdata->sample[i], f)) {
+			_dumb_it_unload_sigdata(sigdata);
+			return NULL;
+		}
+	}
+
+	/* Now let's initialise the remaining variables, and we're done! */
+	sigdata->flags = IT_OLD_EFFECTS | IT_LINEAR_SLIDES | IT_STEREO | IT_WAS_A_669;
+
+	sigdata->global_volume = 128;
+	sigdata->mixing_volume = 48;
+	sigdata->speed = 4;
+	sigdata->tempo = 78;
+	sigdata->pan_separation = 128;
+
+	memset(sigdata->channel_volume, 64, DUMB_IT_N_CHANNELS);
+
+	for (i = 0; i < DUMB_IT_N_CHANNELS; i += 2) {
+		int sep = 32 * dumb_it_default_panning_separation / 100;
+		sigdata->channel_pan[i+0] = 32 + sep;
+		sigdata->channel_pan[i+1] = 32 - sep;
+	}
+
+	_dumb_it_fix_invalid_orders(sigdata);
+
+	return sigdata;
+}
+
+
+
+DUH *DUMBEXPORT dumb_read_669_quick(DUMBFILE *f)
+{
+	sigdata_t *sigdata;
+	int ext;
+
+	DUH_SIGTYPE_DESC *descptr = &_dumb_sigtype_it;
+
+	sigdata = it_669_load_sigdata(f, &ext);
+
+	if (!sigdata)
+		return NULL;
+
+	{
+		const char *tag[2][2];
+		tag[0][0] = "TITLE";
+        tag[0][1] = (const char *)(((DUMB_IT_SIGDATA *)sigdata)->name);
+		tag[1][0] = "FORMAT";
+		tag[1][1] = ext ? "669 Extended" : "669";
+		return make_duh(-1, 2, (const char *const (*)[2])tag, 1, &descptr, &sigdata);
+	}
+}
diff --git a/libraries/dumb/src/it/read6692.c b/libraries/dumb/src/it/read6692.c
new file mode 100644
index 000000000..a9911d3ec
--- /dev/null
+++ b/libraries/dumb/src/it/read6692.c
@@ -0,0 +1,29 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * read6692.c - Code to read a 669 Composer module    / / \  \
+ *              from an open file, and do an initial | <  /   \_
+ *              run-through.                         |  \/ /\   /
+ * By Chris Moeller.                                  \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+
+
+
+DUH *DUMBEXPORT dumb_read_669(DUMBFILE *f)
+{
+    DUH *duh = dumb_read_669_quick(f);
+    dumb_it_do_initial_runthrough(duh);
+    return duh;
+}
diff --git a/libraries/dumb/src/it/readam.c b/libraries/dumb/src/it/readam.c
new file mode 100644
index 000000000..be99f1934
--- /dev/null
+++ b/libraries/dumb/src/it/readam.c
@@ -0,0 +1,788 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * readam.c - Code to read a RIFF AM module           / / \  \
+ *             from a parsed RIFF structure.         | <  /   \_
+ *                                                   |  \/ /\   /
+ * By Chris Moeller.                                  \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "dumb.h"
+#include "internal/it.h"
+#include "internal/riff.h"
+
+static int it_riff_am_process_sample( IT_SAMPLE * sample, DUMBFILE * f, int len, int ver )
+{
+	int header_length;
+	int default_pan;
+	int default_volume;
+	int flags;
+	int length;
+	int length_bytes;
+	int loop_start;
+	int loop_end;
+    int sample_rate;
+
+    int32 start = dumbfile_pos( f );
+
+	if ( ver == 0 )
+    {
+		if ( len < 0x38 )
+			return -1;
+
+		header_length = 0x38;
+
+        dumbfile_getnc( (char *) sample->name, 28, f );
+		sample->name[ 28 ] = 0;
+
+        default_pan = dumbfile_getc( f );
+        default_volume = dumbfile_getc( f );
+        flags = dumbfile_igetw( f );
+        length = dumbfile_igetl( f );
+        loop_start = dumbfile_igetl( f );
+        loop_end = dumbfile_igetl( f );
+        sample_rate = dumbfile_igetl( f );
+	}
+	else
+	{
+		if (len < 4) return -1;
+
+        header_length = dumbfile_igetl( f );
+		if ( header_length < 0x40 )
+			return -1;
+		if ( header_length + 4 > len )
+			return -1;
+
+        start += 4;
+		len -= 4;
+
+        dumbfile_getnc( (char *) sample->name, 32, f );
+
+        default_pan = dumbfile_igetw( f );
+        default_volume = dumbfile_igetw( f );
+        flags = dumbfile_igetw( f );
+        dumbfile_skip( f, 2 );
+        length = dumbfile_igetl( f );
+        loop_start = dumbfile_igetl( f );
+        loop_end = dumbfile_igetl( f );
+        sample_rate = dumbfile_igetl( f );
+
+		if ( default_pan > 0x7FFF || default_volume > 0x7FFF )
+			return -1;
+
+		default_pan = default_pan * 64 / 32767;
+		default_volume = default_volume * 64 / 32767;
+	}
+
+	if ( ! length ) {
+		sample->flags &= ~IT_SAMPLE_EXISTS;
+		return 0;
+	}
+
+	if ( flags & ~( 0x8000 | 0x80 | 0x20 | 0x10 | 0x08 | 0x04 ) )
+		return -1;
+
+	length_bytes = length << ( ( flags & 0x04 ) >> 2 );
+
+	if ( length_bytes + header_length > len )
+		return -1;
+
+	sample->flags = 0;
+
+	if ( flags & 0x80 ) sample->flags |= IT_SAMPLE_EXISTS;
+	if ( flags & 0x04 ) sample->flags |= IT_SAMPLE_16BIT;
+
+	sample->length = length;
+	sample->loop_start = loop_start;
+	sample->loop_end = loop_end;
+	sample->C5_speed = sample_rate;
+	sample->default_volume = default_volume;
+	sample->default_pan = default_pan | ( ( flags & 0x20 ) << 2 );
+	sample->filename[0] = 0;
+	sample->global_volume = 64;
+	sample->vibrato_speed = 0;
+	sample->vibrato_depth = 0;
+	sample->vibrato_rate = 0;
+	sample->vibrato_waveform = IT_VIBRATO_SINE;
+	sample->finetune = 0;
+	sample->max_resampling_quality = -1;
+
+	if ( flags & 0x08 )
+	{
+		if (((unsigned int)sample->loop_end <= (unsigned int)sample->length) &&
+			((unsigned int)sample->loop_start < (unsigned int)sample->loop_end))
+		{
+			sample->length = sample->loop_end;
+			sample->flags |= IT_SAMPLE_LOOP;
+			if ( flags & 0x10 ) sample->flags |= IT_SAMPLE_PINGPONG_LOOP;
+		}
+	}
+
+	length_bytes = sample->length << ( ( flags & 0x04 ) >> 2 );
+
+	sample->data = malloc( length_bytes );
+	if ( ! sample->data )
+		return -1;
+
+    if ( dumbfile_seek( f, start + header_length, DFS_SEEK_SET ) )
+        return -1;
+
+    dumbfile_getnc( sample->data, length_bytes, f );
+
+	return 0;
+}
+
+static int it_riff_am_process_pattern( IT_PATTERN * pattern, DUMBFILE * f, int len, int ver )
+{
+    int nrows, row;
+    long start, end;
+	unsigned flags;
+    int p, q, r;
+	IT_ENTRY * entry;
+
+    nrows = dumbfile_getc( f ) + 1;
+
+	pattern->n_rows = nrows;
+
+	len -= 1;
+
+	pattern->n_entries = 0;
+
+	row = 0;
+
+    start = dumbfile_pos( f );
+    end = start + len;
+
+    while ( (row < nrows) && !dumbfile_error( f ) && (dumbfile_pos( f ) < end) ) {
+        p = dumbfile_getc( f );
+        if ( ! p ) {
+			++ row;
+			continue;
+		}
+
+        flags = p & 0xE0;
+
+        if (flags) {
+			++ pattern->n_entries;
+            if (flags & 0x80) dumbfile_skip( f, 2 );
+            if (flags & 0x40) dumbfile_skip( f, 2 );
+            if (flags & 0x20) dumbfile_skip( f, 1 );
+		}
+	}
+
+	if ( ! pattern->n_entries ) return 0;
+
+	pattern->n_entries += nrows;
+
+	pattern->entry = malloc( pattern->n_entries * sizeof( * pattern->entry ) );
+	if ( ! pattern->entry ) return -1;
+
+	entry = pattern->entry;
+
+	row = 0;
+
+    dumbfile_seek( f, start, DFS_SEEK_SET );
+
+    while ( ( row < nrows ) && !dumbfile_error( f ) && ( dumbfile_pos( f ) < end ) )
+	{
+        p = dumbfile_getc( f );
+
+        if ( ! p )
+		{
+			IT_SET_END_ROW( entry );
+			++ entry;
+			++ row;
+			continue;
+		}
+
+        flags = p;
+		entry->channel = flags & 0x1F;
+		entry->mask = 0;
+
+		if (flags & 0xE0)
+		{
+			if ( flags & 0x80 )
+			{
+                q = dumbfile_getc( f );
+                r = dumbfile_getc( f );
+                _dumb_it_xm_convert_effect( r, q, entry, 0 );
+			}
+
+			if ( flags & 0x40 )
+			{
+                q = dumbfile_getc( f );
+                r = dumbfile_getc( f );
+                if ( q )
+				{
+					entry->mask |= IT_ENTRY_INSTRUMENT;
+                    entry->instrument = q;
+				}
+                if ( r )
+				{
+					entry->mask |= IT_ENTRY_NOTE;
+                    entry->note = r - 1;
+				}
+			}
+
+			if ( flags & 0x20 )
+			{
+                q = dumbfile_getc( f );
+				entry->mask |= IT_ENTRY_VOLPAN;
+                if ( ver == 0 ) entry->volpan = q;
+                else entry->volpan = q * 64 / 127;
+			}
+
+			if (entry->mask) entry++;
+		}
+	}
+
+	while ( row < nrows )
+	{
+		IT_SET_END_ROW( entry );
+		++ entry;
+		++ row;
+	}
+
+	pattern->n_entries = (int)(entry - pattern->entry);
+	if ( ! pattern->n_entries ) return -1;
+
+	return 0;
+}
+
+static DUMB_IT_SIGDATA *it_riff_amff_load_sigdata( DUMBFILE * f, struct riff * stream )
+{
+	DUMB_IT_SIGDATA *sigdata;
+
+    int n, o, p, found;
+
+	if ( ! stream ) goto error;
+
+	if ( stream->type != DUMB_ID( 'A', 'M', 'F', 'F' ) ) goto error;
+
+	sigdata = malloc( sizeof( *sigdata ) );
+	if ( ! sigdata ) goto error;
+
+	sigdata->n_patterns = 0;
+	sigdata->n_samples = 0;
+	sigdata->name[0] = 0;
+
+	found = 0;
+
+	for ( n = 0; (unsigned)n < stream->chunk_count; ++n )
+	{
+		struct riff_chunk * c = stream->chunks + n;
+		switch( c->type )
+		{
+		case DUMB_ID( 'M', 'A', 'I', 'N' ):
+			/* initialization data */
+			if ( ( found & 1 ) || ( c->size < 0x48 ) ) goto error_sd;
+			found |= 1;
+			break;
+
+		case DUMB_ID( 'O', 'R', 'D', 'R' ):
+			if ( ( found & 2 ) || ( c->size < 1 ) ) goto error_sd;
+			found |= 2;
+			break;
+
+        case DUMB_ID( 'P', 'A', 'T', 'T' ):
+            if ( dumbfile_seek( f, c->offset, DFS_SEEK_SET ) ) goto error_sd;
+            o = dumbfile_getc( f );
+            if ( o >= sigdata->n_patterns ) sigdata->n_patterns = o + 1;
+            o = dumbfile_igetl( f );
+            if ( (unsigned)o + 5 > c->size ) goto error_sd;
+			break;
+
+		case DUMB_ID( 'I', 'N', 'S', 'T' ):
+			{
+				if ( c->size < 0xE1 ) goto error_sd;
+                if ( dumbfile_seek( f, c->offset + 1, DFS_SEEK_SET ) ) goto error_sd;
+                o = dumbfile_getc( f );
+                if ( o >= sigdata->n_samples ) sigdata->n_samples = o + 1;
+                if ( c->size >= 0x121 )
+                {
+                    if ( dumbfile_seek( f, c->offset + 0xE1, DFS_SEEK_SET ) ) goto error_sd;
+                    if ( dumbfile_mgetl( f ) == DUMB_ID('S','A','M','P') )
+                    {
+                        unsigned size = dumbfile_igetl( f );
+                        if ( size + 0xE1 + 8 > c->size ) goto error_sd;
+                    }
+				}
+			}
+			break;
+		}
+	}
+
+	if ( found != 3 || !sigdata->n_samples || !sigdata->n_patterns ) goto error_sd;
+
+	if ( sigdata->n_samples > 255 || sigdata->n_patterns > 255 ) goto error_sd;
+
+	sigdata->song_message = NULL;
+	sigdata->order = NULL;
+	sigdata->instrument = NULL;
+	sigdata->sample = NULL;
+	sigdata->pattern = NULL;
+	sigdata->midi = NULL;
+	sigdata->checkpoint = NULL;
+
+	sigdata->mixing_volume = 48;
+	sigdata->pan_separation = 128;
+
+	sigdata->n_instruments = 0;
+	sigdata->n_orders = 0;
+	sigdata->restart_position = 0;
+
+	memset(sigdata->channel_volume, 64, DUMB_IT_N_CHANNELS);
+
+	for (n = 0; n < DUMB_IT_N_CHANNELS; n += 4) {
+		int sep = 32 * dumb_it_default_panning_separation / 100;
+		sigdata->channel_pan[n  ] = 32 - sep;
+		sigdata->channel_pan[n+1] = 32 + sep;
+		sigdata->channel_pan[n+2] = 32 + sep;
+		sigdata->channel_pan[n+3] = 32 - sep;
+	}
+
+    for ( n = 0; (unsigned)n < stream->chunk_count; ++n )
+	{
+		struct riff_chunk * c = stream->chunks + n;
+		switch ( c->type )
+		{
+		case DUMB_ID( 'M', 'A', 'I', 'N' ):
+            if ( dumbfile_seek( f, c->offset, DFS_SEEK_SET ) ) goto error_usd;
+            dumbfile_getnc( (char *) sigdata->name, 64, f );
+			sigdata->name[ 64 ] = 0;
+			sigdata->flags = IT_STEREO | IT_OLD_EFFECTS | IT_COMPATIBLE_GXX | IT_WAS_AN_S3M;
+            o = dumbfile_getc( f );
+            if ( ! ( o & 1 ) ) sigdata->flags |= IT_LINEAR_SLIDES;
+            if ( ( o & ~3 ) || ! ( o & 2 ) ) goto error_usd; // unknown flags
+            sigdata->n_pchannels = dumbfile_getc( f );
+            sigdata->speed = dumbfile_getc( f );
+            sigdata->tempo = dumbfile_getc( f );
+
+            dumbfile_skip( f, 4 );
+
+            sigdata->global_volume = dumbfile_getc( f );
+
+            if ( c->size < 0x48 + (unsigned)sigdata->n_pchannels ) goto error_usd;
+
+			for ( o = 0; o < sigdata->n_pchannels; ++o )
+			{
+                p = dumbfile_getc( f );
+                sigdata->channel_pan[ o ] = p;
+                if ( p >= 128 )
+				{
+					sigdata->channel_volume[ o ] = 0;
+				}
+			}
+			break;
+		}
+	}
+
+	sigdata->pattern = malloc( sigdata->n_patterns * sizeof( *sigdata->pattern ) );
+	if ( ! sigdata->pattern ) goto error_usd;
+	for ( n = 0; n < sigdata->n_patterns; ++n )
+		sigdata->pattern[ n ].entry = NULL;
+
+	sigdata->sample = malloc( sigdata->n_samples * sizeof( *sigdata->sample ) );
+	if ( ! sigdata->sample ) goto error_usd;
+	for ( n = 0; n < sigdata->n_samples; ++n )
+	{
+		IT_SAMPLE * sample = sigdata->sample + n;
+		sample->data = NULL;
+		sample->flags = 0;
+		sample->name[ 0 ] = 0;
+	}
+
+    for ( n = 0; (unsigned)n < stream->chunk_count; ++n )
+	{
+		struct riff_chunk * c = stream->chunks + n;
+		switch ( c->type )
+		{
+		case DUMB_ID( 'O', 'R', 'D', 'R' ):
+            if ( dumbfile_seek( f, c->offset, DFS_SEEK_SET ) ) goto error_usd;
+            sigdata->n_orders = dumbfile_getc( f ) + 1;
+            if ( (unsigned)sigdata->n_orders + 1 > c->size ) goto error_usd;
+			sigdata->order = malloc( sigdata->n_orders );
+			if ( ! sigdata->order ) goto error_usd;
+            dumbfile_getnc( (char *) sigdata->order, sigdata->n_orders, f );
+			break;
+
+		case DUMB_ID( 'P', 'A', 'T', 'T' ):
+            if ( dumbfile_seek( f, c->offset, DFS_SEEK_SET ) ) goto error_usd;
+            o = dumbfile_getc( f );
+            p = dumbfile_igetl( f );
+            if ( it_riff_am_process_pattern( sigdata->pattern + o, f, p, 0 ) ) goto error_usd;
+			break;
+
+		case DUMB_ID( 'I', 'N', 'S', 'T' ):
+			{
+				IT_SAMPLE * sample;
+                if ( dumbfile_seek( f, c->offset + 1, DFS_SEEK_SET ) ) goto error_usd;
+                sample = sigdata->sample + dumbfile_getc( f );
+                if ( c->size >= 0x121 )
+                {
+                    if ( dumbfile_seek( f, c->offset + 0xE1, DFS_SEEK_SET ) ) goto error_usd;
+                    if ( dumbfile_mgetl( f ) == DUMB_ID('S','A','M','P') )
+                    {
+                        unsigned size = dumbfile_igetl( f );
+                        if ( it_riff_am_process_sample( sample, f, size, 0 ) ) goto error_usd;
+                        break;
+                    }
+				}
+                dumbfile_seek( f, c->offset + 2, DFS_SEEK_SET );
+                dumbfile_getnc( (char *) sample->name, 28, f );
+                sample->name[ 28 ] = 0;
+            }
+			break;
+		}
+	}
+
+	_dumb_it_fix_invalid_orders( sigdata );
+
+	return sigdata;
+
+error_usd:
+	_dumb_it_unload_sigdata( sigdata );
+	goto error;
+error_sd:
+	free( sigdata );
+error:
+	return NULL;
+}
+
+static DUMB_IT_SIGDATA *it_riff_am_load_sigdata( DUMBFILE * f, struct riff * stream )
+{
+	DUMB_IT_SIGDATA *sigdata;
+
+	int n, o, p, found;
+
+    if ( ! f || ! stream ) goto error;
+
+	if ( stream->type != DUMB_ID( 'A', 'M', ' ', ' ' ) ) goto error;
+
+	sigdata = malloc(sizeof(*sigdata));
+	if ( ! sigdata ) goto error;
+
+	sigdata->n_patterns = 0;
+	sigdata->n_samples = 0;
+	sigdata->name[0] = 0;
+
+	found = 0;
+
+    for ( n = 0; (unsigned)n < stream->chunk_count; ++n )
+	{
+		struct riff_chunk * c = stream->chunks + n;
+		switch( c->type )
+		{
+		case DUMB_ID( 'I' ,'N' ,'I' ,'T' ):
+			/* initialization data */
+			if ( ( found & 1 ) || ( c->size < 0x48 ) ) goto error_sd;
+			found |= 1;
+			break;
+
+		case DUMB_ID( 'O', 'R', 'D', 'R' ):
+			if ( ( found & 2 ) || ( c->size < 1 ) ) goto error_sd;
+			found |= 2;
+			break;
+
+		case DUMB_ID( 'P', 'A', 'T', 'T' ):
+            if ( dumbfile_seek( f, c->offset, DFS_SEEK_SET ) ) goto error_sd;
+            o = dumbfile_getc( f );
+            if ( o >= sigdata->n_patterns ) sigdata->n_patterns = o + 1;
+            o = dumbfile_igetl( f );
+            if ( (unsigned)o + 5 > c->size ) goto error_sd;
+			break;
+
+		case DUMB_ID( 'R', 'I', 'F', 'F' ):
+			{
+                struct riff * str = c->nested;
+				switch ( str->type )
+				{
+				case DUMB_ID( 'A', 'I', ' ', ' ' ):
+                    for ( o = 0; (unsigned)o < str->chunk_count; ++o )
+					{
+						struct riff_chunk * chk = str->chunks + o;
+						switch( chk->type )
+						{
+						case DUMB_ID( 'I', 'N', 'S', 'T' ):
+							{
+								struct riff * temp;
+								unsigned size;
+								unsigned sample_found;
+                                if ( dumbfile_seek( f, chk->offset, DFS_SEEK_SET ) ) goto error_sd;
+                                size = dumbfile_igetl( f );
+								if ( size < 0x142 ) goto error_sd;
+								sample_found = 0;
+                                dumbfile_skip( f, 1 );
+                                p = dumbfile_getc( f );
+                                if ( p >= sigdata->n_samples ) sigdata->n_samples = p + 1;
+                                temp = riff_parse( f, chk->offset + 4 + size, chk->size - size - 4, 1 );
+								if ( temp )
+								{
+									if ( temp->type == DUMB_ID( 'A', 'S', ' ', ' ' ) )
+									{
+                                        for ( p = 0; (unsigned)p < temp->chunk_count; ++p )
+										{
+											if ( temp->chunks[ p ].type == DUMB_ID( 'S', 'A', 'M', 'P' ) )
+											{
+												if ( sample_found )
+												{
+													riff_free( temp );
+                                                    goto error_sd;
+												}
+												sample_found = 1;
+											}
+										}
+									}
+									riff_free( temp );
+								}
+							}
+						}
+					}
+				}
+			}
+			break;
+		}
+	}
+
+	if ( found != 3 || !sigdata->n_samples || !sigdata->n_patterns ) goto error_sd;
+
+	if ( sigdata->n_samples > 255 || sigdata->n_patterns > 255 ) goto error_sd;
+
+	sigdata->song_message = NULL;
+	sigdata->order = NULL;
+	sigdata->instrument = NULL;
+	sigdata->sample = NULL;
+	sigdata->pattern = NULL;
+	sigdata->midi = NULL;
+	sigdata->checkpoint = NULL;
+
+	sigdata->mixing_volume = 48;
+	sigdata->pan_separation = 128;
+
+	sigdata->n_instruments = 0;
+	sigdata->n_orders = 0;
+	sigdata->restart_position = 0;
+
+	memset(sigdata->channel_volume, 64, DUMB_IT_N_CHANNELS);
+
+	for (n = 0; n < DUMB_IT_N_CHANNELS; n += 4) {
+		int sep = 32 * dumb_it_default_panning_separation / 100;
+		sigdata->channel_pan[n  ] = 32 - sep;
+		sigdata->channel_pan[n+1] = 32 + sep;
+		sigdata->channel_pan[n+2] = 32 + sep;
+		sigdata->channel_pan[n+3] = 32 - sep;
+	}
+
+    for ( n = 0; (unsigned)n < stream->chunk_count; ++n )
+	{
+		struct riff_chunk * c = stream->chunks + n;
+		switch ( c->type )
+		{
+		case DUMB_ID( 'I', 'N', 'I', 'T' ):
+            if ( dumbfile_seek( f, c->offset, DFS_SEEK_SET ) ) goto error_usd;
+            dumbfile_getnc( (char *) sigdata->name, 64, f );
+			sigdata->name[ 64 ] = 0;
+			sigdata->flags = IT_STEREO | IT_OLD_EFFECTS | IT_COMPATIBLE_GXX | IT_WAS_AN_S3M;
+            o = dumbfile_getc( f );
+            if ( ! ( o & 1 ) ) sigdata->flags |= IT_LINEAR_SLIDES;
+            if ( ( o & ~3 ) || ! ( o & 2 ) ) goto error_usd; // unknown flags
+            sigdata->n_pchannels = dumbfile_getc( f );
+            sigdata->speed = dumbfile_getc( f );
+            sigdata->tempo = dumbfile_getc( f );
+
+            dumbfile_skip( f, 4 );
+
+            sigdata->global_volume = dumbfile_getc( f );
+
+            if ( c->size < 0x48 + (unsigned)sigdata->n_pchannels ) goto error_usd;
+
+			for ( o = 0; o < sigdata->n_pchannels; ++o )
+			{
+                p = dumbfile_getc( f );
+                if ( p <= 128 )
+				{
+                    sigdata->channel_pan[ o ] = p / 2;
+				}
+				else
+				{
+					sigdata->channel_volume[ o ] = 0;
+				}
+			}
+			break;
+		}
+	}
+
+	sigdata->pattern = malloc( sigdata->n_patterns * sizeof( *sigdata->pattern ) );
+	if ( ! sigdata->pattern ) goto error_usd;
+	for ( n = 0; n < sigdata->n_patterns; ++n )
+		sigdata->pattern[ n ].entry = NULL;
+
+	sigdata->sample = malloc( sigdata->n_samples * sizeof( *sigdata->sample ) );
+	if ( ! sigdata->sample ) goto error_usd;
+	for ( n = 0; n < sigdata->n_samples; ++n )
+	{
+		IT_SAMPLE * sample = sigdata->sample + n;
+		sample->data = NULL;
+		sample->flags = 0;
+		sample->name[ 0 ] = 0;
+	}
+
+    for ( n = 0; (unsigned)n < stream->chunk_count; ++n )
+	{
+		struct riff_chunk * c = stream->chunks + n;
+		switch ( c->type )
+		{
+		case DUMB_ID( 'O', 'R', 'D', 'R' ):
+            if ( dumbfile_seek( f, c->offset, DFS_SEEK_SET ) ) goto error_usd;
+            sigdata->n_orders = dumbfile_getc( f ) + 1;
+            if ( (unsigned)sigdata->n_orders + 1 > c->size ) goto error_usd;
+			sigdata->order = malloc( sigdata->n_orders );
+			if ( ! sigdata->order ) goto error_usd;
+            dumbfile_getnc( (char *) sigdata->order, sigdata->n_orders, f );
+			break;
+
+		case DUMB_ID( 'P', 'A', 'T', 'T' ):
+            if ( dumbfile_seek( f, c->offset, DFS_SEEK_SET ) ) goto error_usd;
+            o = dumbfile_getc( f );
+            p = dumbfile_igetl( f );
+            if ( it_riff_am_process_pattern( sigdata->pattern + o, f, p, 1 ) ) goto error_usd;
+			break;
+
+		case DUMB_ID( 'R', 'I', 'F', 'F' ):
+			{
+                struct riff * str = c->nested;
+				switch ( str->type )
+				{
+				case DUMB_ID('A', 'I', ' ', ' '):
+                    for ( o = 0; (unsigned)o < str->chunk_count; ++o )
+					{
+						struct riff_chunk * chk = str->chunks + o;
+						switch( chk->type )
+						{
+						case DUMB_ID( 'I', 'N', 'S', 'T' ):
+							{
+								struct riff * temp;
+								unsigned size;
+								unsigned sample_found;
+								IT_SAMPLE * sample;
+                                if ( dumbfile_seek( f, chk->offset, DFS_SEEK_SET ) ) goto error_usd;
+                                size = dumbfile_igetl( f );
+                                dumbfile_skip( f, 1 );
+                                p = dumbfile_getc( f );
+                                temp = riff_parse( f, chk->offset + 4 + size, chk->size - size - 4, 1 );
+								sample_found = 0;
+                                sample = sigdata->sample + p;
+								if ( temp )
+								{
+									if ( temp->type == DUMB_ID( 'A', 'S', ' ', ' ' ) )
+									{
+                                        for ( p = 0; (unsigned)p < temp->chunk_count; ++p )
+										{
+											struct riff_chunk * c = temp->chunks + p;
+											if ( c->type == DUMB_ID( 'S', 'A', 'M', 'P' ) )
+											{
+												if ( sample_found )
+												{
+													riff_free( temp );
+													goto error_usd;
+												}
+                                                {
+                                                    riff_free( temp );
+                                                    goto error_usd;
+                                                }
+                                                if ( it_riff_am_process_sample( sample, f, c->size, 1 ) )
+												{
+													riff_free( temp );
+													goto error_usd;
+												}
+												sample_found = 1;
+											}
+										}
+									}
+									riff_free( temp );
+								}
+								if ( ! sample_found )
+								{
+                                    dumbfile_seek( f, chk->offset + 6, DFS_SEEK_SET );
+                                    dumbfile_getnc( (char *) sample->name, 32, f );
+									sample->name[ 32 ] = 0;
+								}
+							}
+						}
+					}
+				}
+			}
+			break;
+		}
+	}
+
+	_dumb_it_fix_invalid_orders( sigdata );
+
+	return sigdata;
+
+error_usd:
+	_dumb_it_unload_sigdata( sigdata );
+	goto error;
+error_sd:
+	free( sigdata );
+error:
+	return NULL;
+}
+
+DUH *dumb_read_riff_amff( DUMBFILE * f, struct riff * stream )
+{
+	sigdata_t *sigdata;
+	long length;
+
+	DUH_SIGTYPE_DESC *descptr = &_dumb_sigtype_it;
+
+    sigdata = it_riff_amff_load_sigdata( f, stream );
+
+	if (!sigdata)
+		return NULL;
+
+	length = 0;/*_dumb_it_build_checkpoints(sigdata, 0);*/
+
+	{
+		const char *tag[2][2];
+		tag[0][0] = "TITLE";
+        tag[0][1] = (const char *)(((DUMB_IT_SIGDATA *)sigdata)->name);
+		tag[1][0] = "FORMAT";
+		tag[1][1] = "RIFF AMFF";
+		return make_duh( length, 2, ( const char * const (*) [ 2 ] ) tag, 1, & descptr, & sigdata );
+	}
+}
+
+DUH *dumb_read_riff_am( DUMBFILE * f, struct riff * stream )
+{
+	sigdata_t *sigdata;
+
+	DUH_SIGTYPE_DESC *descptr = &_dumb_sigtype_it;
+
+    sigdata = it_riff_am_load_sigdata( f, stream );
+
+	if (!sigdata)
+		return NULL;
+
+	{
+		const char *tag[2][2];
+		tag[0][0] = "TITLE";
+        tag[0][1] = (const char *)(((DUMB_IT_SIGDATA *)sigdata)->name);
+		tag[1][0] = "FORMAT";
+		tag[1][1] = "RIFF AM";
+		return make_duh( -1, 2, ( const char * const (*) [ 2 ] ) tag, 1, & descptr, & sigdata );
+	}
+}
diff --git a/libraries/dumb/src/it/readamf.c b/libraries/dumb/src/it/readamf.c
new file mode 100644
index 000000000..7b72467e0
--- /dev/null
+++ b/libraries/dumb/src/it/readamf.c
@@ -0,0 +1,559 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * readamf.c - Code to read a DSMI AMF module from    / / \  \
+ *             an open file.                         | <  /   \_
+ *                                                   |  \/ /\   /
+ * By Chris Moeller.                                  \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+static void it_amf_process_track( IT_ENTRY *entry_table, unsigned char *track, int rows, int channels )
+{
+	int last_instrument = 0;
+	int tracksize = track[ 0 ] + ( track[ 1 ] << 8 ) + ( track[ 2 ] << 16 );
+	track += 3;
+	while ( tracksize-- ) {
+		unsigned int row = track[ 0 ];
+		unsigned int command = track[ 1 ];
+		unsigned int argument = track[ 2 ];
+		IT_ENTRY * entry = entry_table + row * channels;
+		if ( row >= ( unsigned int ) rows ) break;
+		if ( command < 0x7F ) {
+			entry->mask |= IT_ENTRY_NOTE | IT_ENTRY_INSTRUMENT | IT_ENTRY_VOLPAN;
+			entry->note = command;
+			if ( ! entry->instrument ) entry->instrument = last_instrument;
+			entry->volpan = argument;
+		}
+		else if ( command == 0x7F ) {
+			signed char row_delta = ( signed char ) argument;
+			int row_source = ( int ) row + ( int ) row_delta;
+			if ( row_source >= 0 && row_source < ( int ) rows ) {
+				*entry = entry_table[ row_source * channels ];
+			}
+		}
+		else if ( command == 0x80 ) {
+			entry->mask |= IT_ENTRY_INSTRUMENT;
+			last_instrument = argument + 1;
+			entry->instrument = last_instrument;
+		}
+		else if ( command == 0x83 ) {
+			entry->mask |= IT_ENTRY_VOLPAN;
+			entry->volpan = argument;
+		}
+		else {
+			unsigned int effect = command & 0x7F;
+			unsigned int effectvalue = argument;
+			switch (effect) {
+				case 0x01: effect = IT_SET_SPEED; break;
+
+				case 0x02: effect = IT_VOLUME_SLIDE;
+				case 0x0A: if ( effect == 0x0A ) effect = IT_VOLSLIDE_TONEPORTA;
+				case 0x0B: if ( effect == 0x0B ) effect = IT_VOLSLIDE_VIBRATO;
+					if ( effectvalue & 0x80 ) effectvalue = ( -( signed char ) effectvalue ) & 0x0F;
+					else effectvalue = ( effectvalue & 0x0F ) << 4;
+					break;
+
+				case 0x04:
+					if ( effectvalue & 0x80 ) {
+						effect = IT_PORTAMENTO_UP;
+						effectvalue = ( -( signed char ) effectvalue ) & 0x7F;
+					}
+					else {
+						effect = IT_PORTAMENTO_DOWN;
+					}
+					break;
+
+				case 0x06: effect = IT_TONE_PORTAMENTO; break;
+
+				case 0x07: effect = IT_TREMOR; break;
+
+				case 0x08: effect = IT_ARPEGGIO; break;
+
+				case 0x09: effect = IT_VIBRATO; break;
+
+				case 0x0C: effect = IT_BREAK_TO_ROW; break;
+
+				case 0x0D: effect = IT_JUMP_TO_ORDER; break;
+
+				case 0x0F: effect = IT_RETRIGGER_NOTE; break;
+
+				case 0x10: effect = IT_SET_SAMPLE_OFFSET; break;
+
+				case 0x11:
+					if ( effectvalue ) {
+						effect = IT_VOLUME_SLIDE;
+						if ( effectvalue & 0x80 )
+							effectvalue = 0xF0 | ( ( -( signed char ) effectvalue ) & 0x0F );
+						else
+							effectvalue = 0x0F | ( ( effectvalue & 0x0F ) << 4 );
+					}
+					else
+						effect = 0;
+					break;
+
+				case 0x12:
+				case 0x16:
+					if ( effectvalue ) {
+						int mask = ( effect == 0x16 ) ? 0xE0 : 0xF0;
+						effect = ( effectvalue & 0x80 ) ? IT_PORTAMENTO_UP : IT_PORTAMENTO_DOWN;
+						if ( effectvalue & 0x80 )
+							effectvalue = mask | ( ( -( signed char ) effectvalue ) & 0x0F );
+						else
+							effectvalue = mask | ( effectvalue & 0x0F );
+                    }
+					else
+						effect = 0;
+					break;
+
+				case 0x13:
+					effect = IT_S;
+					effectvalue = EFFECT_VALUE( IT_S_NOTE_DELAY, effectvalue & 0x0F );
+					break;
+
+				case 0x14:
+					effect = IT_S;
+					effectvalue = EFFECT_VALUE( IT_S_DELAYED_NOTE_CUT, effectvalue & 0x0F );
+					break;
+
+				case 0x15: effect = IT_SET_SONG_TEMPO; break;
+
+				case 0x17:
+					effectvalue = ( effectvalue + 64 ) & 0x7F;
+					if ( entry->mask & IT_ENTRY_EFFECT ) {
+						if ( !( entry->mask & IT_ENTRY_VOLPAN ) ) {
+							entry->mask |= IT_ENTRY_VOLPAN;
+							entry->volpan = ( effectvalue / 2 ) + 128;
+						}
+						effect = 0;
+					}
+					else {
+						effect = IT_SET_PANNING;
+					}
+					break;
+
+				default: effect = effectvalue = 0;
+			}
+			if ( effect ) {
+				entry->mask |= IT_ENTRY_EFFECT;
+				entry->effect = effect;
+				entry->effectvalue = effectvalue;
+			}
+		}
+		track += 3;
+	}
+}
+
+static int it_amf_process_pattern( IT_PATTERN *pattern, IT_ENTRY *entry_table, int rows, int channels )
+{
+	int i, j;
+	int n_entries = rows;
+	IT_ENTRY * entry;
+
+	pattern->n_rows = rows;
+
+	for ( i = 0, j = channels * rows; i < j; i++ ) {
+		if ( entry_table[ i ].mask ) {
+			n_entries++;
+		}
+	}
+
+	pattern->n_entries = n_entries;
+
+	pattern->entry = entry = malloc( n_entries * sizeof( IT_ENTRY ) );
+	if ( !entry ) {
+		return -1;
+	}
+
+	for ( i = 0; i < rows; i++ ) {
+		for ( j = 0; j < channels; j++ ) {
+			if ( entry_table[ i * channels + j ].mask ) {
+				*entry = entry_table[ i * channels + j ];
+				entry->channel = j;
+				entry++;
+			}
+		}
+		IT_SET_END_ROW( entry );
+		entry++;
+	}
+
+	return 0;
+}
+
+static int it_amf_read_sample_header( IT_SAMPLE *sample, DUMBFILE *f, int * offset, int ver )
+{
+	int exists;
+
+	exists = dumbfile_getc( f );
+
+    dumbfile_getnc( (char *) sample->name, 32, f );
+	sample->name[32] = 0;
+
+    dumbfile_getnc( (char *) sample->filename, 13, f );
+	sample->filename[13] = 0;
+
+	*offset = dumbfile_igetl( f );
+	sample->length = dumbfile_igetl( f );
+	sample->C5_speed = dumbfile_igetw( f );
+	sample->default_volume = dumbfile_getc( f );
+	sample->global_volume = 64;
+	if ( sample->default_volume > 64 ) sample->default_volume = 64;
+
+	if ( ver >= 11 ) {
+		sample->loop_start = dumbfile_igetl( f );
+		sample->loop_end = dumbfile_igetl( f );
+	} else {
+		sample->loop_start = dumbfile_igetw( f );
+		sample->loop_end = sample->length;
+	}
+
+	if ( sample->length <= 0 ) {
+		sample->flags = 0;
+		return 0;
+	}
+
+	sample->flags = exists == 1 ? IT_SAMPLE_EXISTS : 0;
+
+	sample->default_pan = 0;
+	sample->finetune = 0;
+
+	if ( sample->loop_end > sample->loop_start + 2 && sample->loop_end <= sample->length )
+		sample->flags |= IT_SAMPLE_LOOP;
+
+	sample->vibrato_speed = 0;
+	sample->vibrato_depth = 0;
+	sample->vibrato_rate = 0;
+	sample->vibrato_waveform = 0; // do we have to set _all_ these?
+	sample->max_resampling_quality = -1;
+
+	return dumbfile_error(f);
+}
+
+
+
+static int it_amf_read_sample_data( IT_SAMPLE *sample, DUMBFILE *f )
+{
+	int i, read_length = 0;
+
+	sample->data = malloc( sample->length );
+
+	if ( !sample->data )
+		return -1;
+
+	if ( sample->length )
+		read_length = dumbfile_getnc( sample->data, sample->length, f );
+
+	for ( i = 0; i < read_length; i++ ) {
+		( ( char * ) sample->data )[ i ] ^= 0x80;
+	}
+
+	for ( i = read_length; i < sample->length; i++ ) {
+		( ( char * ) sample->data )[ i ] = 0;
+	}
+
+	return 0; /* Sometimes the last sample is truncated :( */
+}
+
+static DUMB_IT_SIGDATA *it_amf_load_sigdata(DUMBFILE *f, int * version)
+{
+	DUMB_IT_SIGDATA *sigdata;
+	int i, j, ver, ntracks, realntracks, nchannels;
+
+	int maxsampleseekpos = 0;
+	int sampleseekpos[256];
+
+	unsigned short *orderstotracks;
+	unsigned short *trackmap;
+	unsigned int tracksize[256];
+
+	unsigned char **track;
+
+	static const char sig[] = "AMF";
+
+	char signature [3];
+
+	if ( dumbfile_getnc( signature, 3, f ) != 3 ||
+		memcmp( signature, sig, 3 ) ) {
+		return NULL;
+	}
+
+	*version = ver = dumbfile_getc( f );
+	if ( ver < 10 || ver > 14) {
+		return NULL;
+	}
+
+	sigdata = malloc(sizeof(*sigdata));
+	if (!sigdata) {
+		return NULL;
+	}
+
+    dumbfile_getnc( (char *) sigdata->name, 32, f );
+	sigdata->name[ 32 ] = 0;
+	sigdata->n_samples = dumbfile_getc( f );
+	sigdata->n_orders = dumbfile_getc( f );
+	ntracks = dumbfile_igetw( f );
+	nchannels = dumbfile_getc( f );
+
+	if ( dumbfile_error( f ) ||
+		sigdata->n_samples < 1 || sigdata->n_samples > 255 ||
+		sigdata->n_orders < 1 || sigdata->n_orders > 255 ||
+		! ntracks ||
+		nchannels < 1 || nchannels > 32 ) {
+		free( sigdata );
+		return NULL;
+	}
+    
+    sigdata->n_pchannels = nchannels;
+
+	memset( sigdata->channel_volume, 64, DUMB_IT_N_CHANNELS );
+
+	if ( ver >= 11 ) {
+		int nchannels = ( ver >= 13 ) ? 32 : 16;
+		for ( i = 0; i < nchannels; i++ ) {
+			signed char panpos = dumbfile_getc( f );
+			int pan = ( panpos + 64 ) / 2;
+			if ( pan < 0 ) pan = 0;
+			else if ( pan > 64 ) pan = IT_SURROUND;
+			sigdata->channel_pan[ i ] = pan;
+		}
+	}
+	else {
+		int sep = 32 * dumb_it_default_panning_separation / 100;
+		for ( i = 0; i < 16; i++ ) {
+			sigdata->channel_pan[ i ] = ( dumbfile_getc( f ) & 1 ) ? 32 - sep : 32 + sep;
+		}
+	}
+
+	sigdata->tempo = 125;
+	sigdata->speed = 6;
+	if ( ver >= 13 ) {
+		i = dumbfile_getc( f );
+		if ( i >= 32 ) sigdata->tempo = i;
+		i = dumbfile_getc( f );
+		if ( i <= 32 ) sigdata->speed = i;
+	}
+
+	sigdata->order = malloc( sigdata->n_orders );
+	if ( !sigdata->order ) {
+		free( sigdata );
+		return NULL;
+	}
+
+	orderstotracks = malloc( sigdata->n_orders * nchannels * sizeof( unsigned short ) );
+	if ( !orderstotracks ) {
+		free( sigdata->order );
+		free( sigdata );
+		return NULL;
+	}
+
+	for ( i = 0; i < sigdata->n_orders; i++ ) {
+		sigdata->order[ i ] = i;
+		tracksize[ i ] = 64;
+		if ( ver >= 14 ) {
+			tracksize[ i ] = dumbfile_igetw( f );
+		}
+		for ( j = 0; j < nchannels; j++ ) {
+			orderstotracks[ i * nchannels + j ] = dumbfile_igetw( f );
+		}
+	}
+
+	if ( dumbfile_error( f ) ) {
+		free( orderstotracks );
+		free( sigdata->order );
+		free( sigdata );
+		return NULL;
+	}
+
+	sigdata->sample = malloc( sigdata->n_samples * sizeof( *sigdata->sample ) );
+	if ( !sigdata->sample ) {
+		free( orderstotracks );
+		free( sigdata->order );
+		free( sigdata );
+		return NULL;
+	}
+
+	sigdata->restart_position = 0;
+
+	sigdata->song_message = NULL;
+	sigdata->instrument = NULL;
+	sigdata->pattern = NULL;
+	sigdata->midi = NULL;
+	sigdata->checkpoint = NULL;
+
+	sigdata->n_instruments = 0;
+
+	for ( i = 0; i < sigdata->n_samples; ++i )
+		sigdata->sample[i].data = NULL;
+
+	for ( i = 0; i < sigdata->n_samples; ++i ) {
+		int offset;
+		if ( it_amf_read_sample_header( &sigdata->sample[i], f, &offset, ver ) ) {
+			goto error_ott;
+		}
+		sampleseekpos[ i ] = offset;
+		if ( offset > maxsampleseekpos ) maxsampleseekpos = offset;
+	}
+
+	sigdata->n_patterns = sigdata->n_orders;
+
+	sigdata->pattern = malloc( sigdata->n_patterns * sizeof( *sigdata->pattern ) );
+	if ( !sigdata->pattern ) {
+		goto error_ott;
+	}
+	for (i = 0; i < sigdata->n_patterns; ++i)
+		sigdata->pattern[i].entry = NULL;
+
+	trackmap = malloc( ntracks * sizeof( unsigned short ) );
+	if ( !trackmap ) {
+		goto error_ott;
+	}
+
+	if ( dumbfile_getnc( ( char * ) trackmap, ntracks * sizeof( unsigned short ), f ) != (long)(ntracks * sizeof( unsigned short )) ) {
+		goto error_tm;
+	}
+
+	realntracks = 0;
+
+	for ( i = 0; i < ntracks; i++ ) {
+		if ( trackmap[ i ] > realntracks ) realntracks = trackmap[ i ];
+	}
+
+	track = calloc( realntracks, sizeof( unsigned char * ) );
+	if ( !track ) {
+		goto error_tm;
+	}
+
+	for ( i = 0; i < realntracks; i++ ) {
+		int tracksize = dumbfile_igetw( f );
+		tracksize += dumbfile_getc( f ) << 16;
+		track[ i ] = malloc( tracksize * 3 + 3 );
+		if ( !track[ i ] ) {
+			goto error_all;
+		}
+		track[ i ][ 0 ] = tracksize & 255;
+		track[ i ][ 1 ] = ( tracksize >> 8 ) & 255;
+		track[ i ][ 2 ] = ( tracksize >> 16 ) & 255;
+        if ( dumbfile_getnc( (char *) track[ i ] + 3, tracksize * 3, f ) != tracksize * 3 ) {
+			goto error_all;
+		}
+	}
+
+	for ( i = 1; i <= maxsampleseekpos; i++ ) {
+		for ( j = 0; j < sigdata->n_samples; j++ ) {
+			if ( sampleseekpos[ j ] == i ) {
+				if ( it_amf_read_sample_data( &sigdata->sample[ j ], f ) ) {
+					goto error_all;
+				}
+				break;
+			}
+		}
+	}
+
+	/* Process tracks into patterns */
+	for ( i = 0; i < sigdata->n_patterns; i++ ) {
+		IT_ENTRY * entry_table = calloc( tracksize[ i ] * nchannels, sizeof( IT_ENTRY ) );
+		if ( !entry_table ) {
+			goto error_all;
+		}
+		for ( j = 0; j < nchannels; j++ ) {
+			int ntrack = orderstotracks[ i * nchannels + j ];
+			if ( ntrack && ntrack <= ntracks ) {
+				int realtrack = trackmap[ ntrack - 1 ];
+				if ( realtrack ) {
+					realtrack--;
+					if ( realtrack < realntracks && track[ realtrack ] ) {
+						it_amf_process_track( entry_table + j, track[ realtrack ], tracksize[ i ], nchannels );
+					}
+				}
+			}
+		}
+		if ( it_amf_process_pattern( &sigdata->pattern[ i ], entry_table, tracksize[ i ], nchannels ) ) {
+			free( entry_table );
+			goto error_all;
+		}
+		free( entry_table );
+	}
+
+	/* Now let's initialise the remaining variables, and we're done! */
+	sigdata->flags = IT_OLD_EFFECTS | IT_COMPATIBLE_GXX | IT_STEREO | IT_WAS_AN_S3M;
+
+	sigdata->global_volume = 128;
+	sigdata->mixing_volume = 48;
+	sigdata->pan_separation = 128;
+
+	_dumb_it_fix_invalid_orders(sigdata);
+
+	for ( i = 0; i < realntracks; i++ ) {
+		if ( track[ i ] ) {
+			free( track[ i ] );
+		}
+	}
+	free( track );
+	free( trackmap );
+	free( orderstotracks );
+
+	return sigdata;
+
+error_all:
+	for ( i = 0; i < realntracks; i++ ) {
+		if ( track[ i ] ) {
+			free( track[ i ] );
+		}
+	}
+	free( track );
+error_tm:
+	free( trackmap );
+error_ott:
+	free( orderstotracks );
+	_dumb_it_unload_sigdata( sigdata );
+	return NULL;
+}
+
+
+
+DUH *DUMBEXPORT dumb_read_amf_quick(DUMBFILE *f)
+{
+	sigdata_t *sigdata;
+
+	DUH_SIGTYPE_DESC *descptr = &_dumb_sigtype_it;
+
+	int version;
+
+	sigdata = it_amf_load_sigdata(f, &version);
+
+	if (!sigdata)
+		return NULL;
+
+	{
+		const char *tag[2][2];
+		char ver_string[14];
+		tag[0][0] = "TITLE";
+        tag[0][1] = (const char *)(((DUMB_IT_SIGDATA *)sigdata)->name);
+		tag[1][0] = "FORMAT";
+		memcpy( ver_string, "DSMI AMF v", 10 );
+		ver_string[10] = '0' + version / 10;
+		ver_string[11] = '.';
+		ver_string[12] = '0' + version % 10;
+		ver_string[13] = 0;
+		tag[1][1] = ver_string;
+		return make_duh(-1, 2, (const char *const (*)[2])tag, 1, &descptr, &sigdata);
+	}
+}
diff --git a/libraries/dumb/src/it/readamf2.c b/libraries/dumb/src/it/readamf2.c
new file mode 100644
index 000000000..3c8732227
--- /dev/null
+++ b/libraries/dumb/src/it/readamf2.c
@@ -0,0 +1,29 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * readamf2.c - Function to read a DSMI AMF module    / / \  \
+ *              from an open file and do an initial  | <  /   \_
+ *              run-through.                         |  \/ /\   /
+ *                                                    \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+
+
+
+DUH *DUMBEXPORT dumb_read_amf(DUMBFILE *f)
+{
+	DUH *duh = dumb_read_amf_quick(f);
+	dumb_it_do_initial_runthrough(duh);
+	return duh;
+}
diff --git a/libraries/dumb/src/it/readany.c b/libraries/dumb/src/it/readany.c
new file mode 100644
index 000000000..9d90776ff
--- /dev/null
+++ b/libraries/dumb/src/it/readany.c
@@ -0,0 +1,132 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * readany.c - Code to detect and read any of the     / / \  \
+ *             module formats supported by DUMB.     | <  /   \_
+ *                                                   |  \/ /\   /
+ * By Chris Moeller.                                  \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "dumb.h"
+
+#ifdef _MSC_VER
+	#define strnicmp _strnicmp
+#else
+	#if defined(unix) || defined(__unix__) || defined(__unix)
+		#include <strings.h>
+	#endif
+	#define strnicmp strncasecmp
+#endif
+
+enum { maximum_signature_size = 0x30 };
+
+DUH *DUMBEXPORT dumb_read_any_quick(DUMBFILE *f, int restrict_, int subsong)
+{
+    unsigned char signature[ maximum_signature_size ];
+    unsigned long signature_size;
+    DUH * duh = NULL;
+
+    signature_size = dumbfile_get_size(f);
+
+    signature_size = dumbfile_getnc( (char *)signature, maximum_signature_size, f );
+    dumbfile_seek( f, 0, DFS_SEEK_SET );
+
+    if (signature_size >= 4 &&
+        signature[0] == 'I' && signature[1] == 'M' &&
+        signature[2] == 'P' && signature[3] == 'M')
+    {
+        duh = dumb_read_it_quick( f );
+    }
+    else if (signature_size >= 17 && !memcmp(signature, "Extended Module: ", 17))
+    {
+        duh = dumb_read_xm_quick( f );
+    }
+    else if (signature_size >= 0x30 &&
+        signature[0x2C] == 'S' && signature[0x2D] == 'C' &&
+        signature[0x2E] == 'R' && signature[0x2F] == 'M')
+    {
+        duh = dumb_read_s3m_quick( f );
+    }
+    else if (signature_size >= 30 &&
+        /*signature[28] == 0x1A &&*/ signature[29] == 2 &&
+        ( ! strnicmp( ( const char * ) signature + 20, "!Scream!", 8 ) ||
+        ! strnicmp( ( const char * ) signature + 20, "BMOD2STM", 8 ) ||
+        ! strnicmp( ( const char * ) signature + 20, "WUZAMOD!", 8 ) ) )
+    {
+        duh = dumb_read_stm_quick( f );
+    }
+    else if (signature_size >= 2 &&
+        ((signature[0] == 0x69 && signature[1] == 0x66) ||
+        (signature[0] == 0x4A && signature[1] == 0x4E)))
+    {
+        duh = dumb_read_669_quick( f );
+    }
+    else if (signature_size >= 0x30 &&
+        signature[0x2C] == 'P' && signature[0x2D] == 'T' &&
+        signature[0x2E] == 'M' && signature[0x2F] == 'F')
+    {
+        duh = dumb_read_ptm_quick( f );
+    }
+    else if (signature_size >= 4 &&
+        signature[0] == 'P' && signature[1] == 'S' &&
+        signature[2] == 'M' && signature[3] == ' ')
+    {
+        duh = dumb_read_psm_quick( f, subsong );
+    }
+    else if (signature_size >= 4 &&
+        signature[0] == 'P' && signature[1] == 'S' &&
+        signature[2] == 'M' && signature[3] == 254)
+    {
+        duh = dumb_read_old_psm_quick( f );
+    }
+    else if (signature_size >= 3 &&
+        signature[0] == 'M' && signature[1] == 'T' &&
+        signature[2] == 'M')
+    {
+        duh = dumb_read_mtm_quick( f );
+    }
+    else if ( signature_size >= 4 &&
+        signature[0] == 'R' && signature[1] == 'I' &&
+        signature[2] == 'F' && signature[3] == 'F')
+    {
+        duh = dumb_read_riff_quick( f );
+    }
+    else if ( signature_size >= 24 &&
+        !memcmp( signature, "ASYLUM Music Format", 19 ) &&
+        !memcmp( signature + 19, " V1.0", 5 ) )
+    {
+        duh = dumb_read_asy_quick( f );
+    }
+    else if ( signature_size >= 3 &&
+        signature[0] == 'A' && signature[1] == 'M' &&
+        signature[2] == 'F')
+    {
+        duh = dumb_read_amf_quick( f );
+    }
+    else if ( signature_size >= 8 &&
+        !memcmp( signature, "OKTASONG", 8 ) )
+    {
+        duh = dumb_read_okt_quick( f );
+    }
+
+    if ( !duh )
+    {
+        dumbfile_seek( f, 0, DFS_SEEK_SET );
+        duh = dumb_read_mod_quick( f, restrict_ );
+    }
+
+    return duh;
+}
diff --git a/libraries/dumb/src/it/readany2.c b/libraries/dumb/src/it/readany2.c
new file mode 100644
index 000000000..bd0102cab
--- /dev/null
+++ b/libraries/dumb/src/it/readany2.c
@@ -0,0 +1,29 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * readany2.c - Code to detect and read any of the    / / \  \
+ *              module formats supported by DUMB     | <  /   \_
+ *              from an open file and do an initial  |  \/ /\   /
+ *              run-through.                          \_  /  > /
+ *                                                      | \ / /
+ * by Chris Moeller.                                    |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+
+
+
+DUH *DUMBEXPORT dumb_read_any(DUMBFILE *f, int restrict_, int subsong)
+{
+    DUH *duh = dumb_read_any_quick(f, restrict_, subsong);
+	dumb_it_do_initial_runthrough(duh);
+	return duh;
+}
diff --git a/libraries/dumb/src/it/readasy.c b/libraries/dumb/src/it/readasy.c
new file mode 100644
index 000000000..cc77dc39a
--- /dev/null
+++ b/libraries/dumb/src/it/readasy.c
@@ -0,0 +1,339 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * readasy.c - Code to read an ASYLUM Music Format    / / \  \
+ *             module from an open file.             | <  /   \_
+ *                                                   |  \/ /\   /
+ * By Chris Moeller.                                  \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+static int it_asy_read_pattern( IT_PATTERN *pattern, DUMBFILE *f, unsigned char *buffer )
+{
+	int pos;
+	int channel;
+	int row;
+	IT_ENTRY *entry;
+
+	pattern->n_rows = 64;
+
+    if ( dumbfile_getnc( (char *) buffer, 64 * 8 * 4, f ) != 64 * 8 * 4 )
+		return -1;
+
+	/* compute number of entries */
+	pattern->n_entries = 64; /* Account for the row end markers */
+	pos = 0;
+	for ( row = 0; row < 64; ++row ) {
+		for ( channel = 0; channel < 8; ++channel ) {
+			if ( buffer[ pos + 0 ] | buffer[ pos + 1 ] | buffer[ pos + 2 ] | buffer[ pos + 3 ] )
+				++pattern->n_entries;
+			pos += 4;
+		}
+	}
+
+	pattern->entry = malloc( pattern->n_entries * sizeof( *pattern->entry ) );
+	if ( !pattern->entry )
+		return -1;
+
+	entry = pattern->entry;
+	pos = 0;
+	for ( row = 0; row < 64; ++row ) {
+		for ( channel = 0; channel < 8; ++channel ) {
+			if ( buffer[ pos + 0 ] | buffer[ pos + 1 ] | buffer[ pos + 2 ] | buffer[ pos + 3 ] ) {
+				entry->channel = channel;
+				entry->mask = 0;
+
+				if ( buffer[ pos + 0 ] && buffer[ pos + 0 ] < 96 ) {
+					entry->note = buffer[ pos + 0 ];
+					entry->mask |= IT_ENTRY_NOTE;
+				}
+
+				if ( buffer[ pos + 1 ] && buffer[ pos + 1 ] <= 64 ) {
+					entry->instrument = buffer[ pos + 1 ];
+					entry->mask |= IT_ENTRY_INSTRUMENT;
+				}
+
+				_dumb_it_xm_convert_effect( buffer[ pos + 2 ], buffer[ pos + 3 ], entry, 1 );
+                
+                // fixup
+                switch ( entry->effect ) {
+                    case IT_SET_PANNING:
+                        entry->effectvalue <<= 1;
+                        break;
+                }
+
+				if ( entry->mask ) ++entry;
+			}
+			pos += 4;
+		}
+		IT_SET_END_ROW( entry );
+		++entry;
+	}
+
+	pattern->n_entries = (int)(entry - pattern->entry);
+
+	return 0;
+}
+
+
+
+static int it_asy_read_sample_header( IT_SAMPLE *sample, DUMBFILE *f )
+{
+	int finetune, key_offset;
+
+/**
+     21       22   Chars     Sample 1 name.  If the name is not a full
+                             22 chars in length, it will be null
+                             terminated.
+
+If
+the sample name begins with a '#' character (ASCII $23 (35)) then this is
+assumed not to be an instrument name, and is probably a message.
+*/
+    dumbfile_getnc( (char *) sample->name, 22, f );
+	sample->name[22] = 0;
+
+	sample->filename[0] = 0;
+
+/** Each  finetune step changes  the note 1/8th  of  a  semitone. */
+	finetune = ( signed char ) ( dumbfile_getc( f ) << 4 ) >> 4; /* signed nibble */
+	sample->default_volume = dumbfile_getc( f ); // Should we be setting global_volume to this instead?
+	sample->global_volume = 64;
+	if ( sample->default_volume > 64 ) sample->default_volume = 64;
+	key_offset = ( signed char ) dumbfile_getc( f ); /* base key offset */
+	sample->length = dumbfile_igetl( f );
+	sample->loop_start = dumbfile_igetl( f );
+	sample->loop_end = sample->loop_start + dumbfile_igetl( f );
+
+	if ( sample->length <= 0 ) {
+		sample->flags = 0;
+		return 0;
+	}
+
+	sample->flags = IT_SAMPLE_EXISTS;
+
+	sample->default_pan = 0;
+	sample->C5_speed = (int)( AMIGA_CLOCK / 214.0 * pow( DUMB_SEMITONE_BASE, key_offset ) );//( long )( 16726.0 * pow( DUMB_PITCH_BASE, finetune * 32 ) );
+	sample->finetune = finetune * 32;
+	// the above line might be wrong
+
+	if ( ( sample->loop_end - sample->loop_start > 2 ) && ( sample->loop_end <= sample->length ) )
+		sample->flags |= IT_SAMPLE_LOOP;
+
+	sample->vibrato_speed = 0;
+	sample->vibrato_depth = 0;
+	sample->vibrato_rate = 0;
+	sample->vibrato_waveform = 0; // do we have to set _all_ these?
+	sample->max_resampling_quality = -1;
+
+	return dumbfile_error(f);
+}
+
+
+
+static int it_asy_read_sample_data( IT_SAMPLE *sample, DUMBFILE *f )
+{
+	int32 truncated_size;
+
+	/* let's get rid of the sample data coming after the end of the loop */
+	if ( ( sample->flags & IT_SAMPLE_LOOP ) && sample->loop_end < sample->length ) {
+		truncated_size = sample->length - sample->loop_end;
+		sample->length = sample->loop_end;
+	} else {
+		truncated_size = 0;
+	}
+
+	sample->data = malloc( sample->length );
+
+	if ( !sample->data )
+		return -1;
+
+	if ( sample->length )
+		dumbfile_getnc( sample->data, sample->length, f );
+
+	dumbfile_skip( f, truncated_size );
+
+	return dumbfile_error( f );
+}
+
+
+
+static DUMB_IT_SIGDATA *it_asy_load_sigdata(DUMBFILE *f)
+{
+	DUMB_IT_SIGDATA *sigdata;
+	int i;
+
+	static const char sig_part[] = "ASYLUM Music Format";
+	static const char sig_rest[] = " V1.0"; /* whee, string space optimization with format type below */
+
+	char signature [32];
+
+	if ( dumbfile_getnc( signature, 32, f ) != 32 ||
+		memcmp( signature, sig_part, 19 ) ||
+		memcmp( signature + 19, sig_rest, 5 ) ) {
+		return NULL;
+	}
+
+	sigdata = malloc(sizeof(*sigdata));
+	if (!sigdata) {
+		return NULL;
+	}
+
+	sigdata->speed = dumbfile_getc( f ); /* XXX seems to fit the files I have */
+	sigdata->tempo = dumbfile_getc( f ); /* ditto */
+	sigdata->n_samples = dumbfile_getc( f ); /* ditto */
+	sigdata->n_patterns = dumbfile_getc( f );
+	sigdata->n_orders = dumbfile_getc( f );
+	sigdata->restart_position = dumbfile_getc( f );
+
+	if ( dumbfile_error( f ) || !sigdata->n_samples || sigdata->n_samples > 64 || !sigdata->n_patterns ||
+		!sigdata->n_orders ) {
+		free( sigdata );
+		return NULL;
+	}
+
+	if ( sigdata->restart_position > sigdata->n_orders ) /* XXX */
+		sigdata->restart_position = 0;
+
+	sigdata->order = malloc( sigdata->n_orders );
+	if ( !sigdata->order ) {
+		free( sigdata );
+		return NULL;
+	}
+
+    if ( dumbfile_getnc( (char *) sigdata->order, sigdata->n_orders, f ) != sigdata->n_orders ||
+		dumbfile_skip( f, 256 - sigdata->n_orders ) ) {
+		free( sigdata->order );
+		free( sigdata );
+		return NULL;
+	}
+
+	sigdata->sample = malloc( sigdata->n_samples * sizeof( *sigdata->sample ) );
+	if ( !sigdata->sample ) {
+		free( sigdata->order );
+		free( sigdata );
+		return NULL;
+	}
+
+	sigdata->song_message = NULL;
+	sigdata->instrument = NULL;
+	sigdata->pattern = NULL;
+	sigdata->midi = NULL;
+	sigdata->checkpoint = NULL;
+
+	sigdata->n_instruments = 0;
+
+	for ( i = 0; i < sigdata->n_samples; ++i )
+		sigdata->sample[i].data = NULL;
+
+	for ( i = 0; i < sigdata->n_samples; ++i ) {
+		if ( it_asy_read_sample_header( &sigdata->sample[i], f ) ) {
+			_dumb_it_unload_sigdata( sigdata );
+			return NULL;
+		}
+	}
+
+	if ( dumbfile_skip( f, 37 * ( 64 - sigdata->n_samples ) ) ) {
+		_dumb_it_unload_sigdata( sigdata );
+		return NULL;
+	}
+
+	sigdata->pattern = malloc( sigdata->n_patterns * sizeof( *sigdata->pattern ) );
+	if ( !sigdata->pattern ) {
+		_dumb_it_unload_sigdata( sigdata );
+		return NULL;
+	}
+	for (i = 0; i < sigdata->n_patterns; ++i)
+		sigdata->pattern[i].entry = NULL;
+
+	/* Read in the patterns */
+	{
+		unsigned char *buffer = malloc( 64 * 8 * 4 ); /* 64 rows * 8 channels * 4 bytes */
+		if ( !buffer ) {
+			_dumb_it_unload_sigdata( sigdata );
+			return NULL;
+		}
+		for ( i = 0; i < sigdata->n_patterns; ++i ) {
+			if ( it_asy_read_pattern( &sigdata->pattern[i], f, buffer ) != 0 ) {
+				free( buffer );
+				_dumb_it_unload_sigdata( sigdata );
+				return NULL;
+			}
+		}
+		free( buffer );
+	}
+
+	/* And finally, the sample data */
+	for ( i = 0; i < sigdata->n_samples; ++i ) {
+		if ( it_asy_read_sample_data( &sigdata->sample[i], f ) ) {
+			_dumb_it_unload_sigdata( sigdata );
+			return NULL;
+		}
+	}
+
+	/* Now let's initialise the remaining variables, and we're done! */
+	sigdata->flags = IT_WAS_AN_XM | IT_WAS_A_MOD | IT_OLD_EFFECTS | IT_COMPATIBLE_GXX | IT_STEREO;
+
+	sigdata->global_volume = 128;
+	sigdata->mixing_volume = 48;
+	sigdata->pan_separation = 128;
+
+	sigdata->n_pchannels = 8;
+
+	sigdata->name[0] = 0;
+
+	memset(sigdata->channel_volume, 64, DUMB_IT_N_CHANNELS);
+
+	for (i = 0; i < DUMB_IT_N_CHANNELS; i += 4) {
+		int sep = 32 * dumb_it_default_panning_separation / 100;
+		sigdata->channel_pan[i+0] = 32 - sep;
+		sigdata->channel_pan[i+1] = 32 + sep;
+		sigdata->channel_pan[i+2] = 32 + sep;
+		sigdata->channel_pan[i+3] = 32 - sep;
+	}
+
+	_dumb_it_fix_invalid_orders(sigdata);
+
+	return sigdata;
+}
+
+
+
+DUH *DUMBEXPORT dumb_read_asy_quick(DUMBFILE *f)
+{
+	sigdata_t *sigdata;
+
+	DUH_SIGTYPE_DESC *descptr = &_dumb_sigtype_it;
+
+	sigdata = it_asy_load_sigdata(f);
+
+	if (!sigdata)
+		return NULL;
+
+	{
+		const char *tag[2][2];
+		tag[0][0] = "TITLE";
+        tag[0][1] = (const char *)(((DUMB_IT_SIGDATA *)sigdata)->name);
+		tag[1][0] = "FORMAT";
+		tag[1][1] = "ASYLUM Music Format";
+		return make_duh(-1, 2, (const char *const (*)[2])tag, 1, &descptr, &sigdata);
+	}
+}
diff --git a/libraries/dumb/src/it/readdsmf.c b/libraries/dumb/src/it/readdsmf.c
new file mode 100644
index 000000000..d64d87950
--- /dev/null
+++ b/libraries/dumb/src/it/readdsmf.c
@@ -0,0 +1,383 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * readam.c - Code to read a RIFF DSMF module         / / \  \
+ *             from a parsed RIFF structure.         | <  /   \_
+ *                                                   |  \/ /\   /
+ * By Chris Moeller.                                  \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "dumb.h"
+#include "internal/it.h"
+#include "internal/riff.h"
+
+static int it_riff_dsmf_process_sample( IT_SAMPLE * sample, DUMBFILE * f, int len )
+{
+	int flags;
+
+    dumbfile_getnc( (char *) sample->filename, 13, f );
+	sample->filename[ 14 ] = 0;
+	
+    flags = dumbfile_igetw( f );
+    sample->default_volume = dumbfile_getc( f );
+    sample->length = dumbfile_igetl( f );
+    sample->loop_start = dumbfile_igetl( f );
+    sample->loop_end = dumbfile_igetl( f );
+    dumbfile_skip( f, 32 - 28 );
+    sample->C5_speed = dumbfile_igetw( f ) * 2;
+    dumbfile_skip( f, 36 - 34 );
+    dumbfile_getnc( (char *) sample->name, 28, f );
+	sample->name[ 28 ] = 0;
+
+	/*if ( data[ 0x38 ] || data[ 0x39 ] || data[ 0x3A ] || data[ 0x3B ] )
+		return -1;*/
+
+	if ( ! sample->length ) {
+		sample->flags &= ~IT_SAMPLE_EXISTS;
+		return 0;
+	}
+
+	/*if ( flags & ~( 2 | 1 ) )
+		return -1;*/
+
+	if ( sample->length + 64 > len )
+		return -1;
+
+	sample->flags = IT_SAMPLE_EXISTS;
+
+	sample->default_pan = 0;
+	sample->global_volume = 64;
+	sample->vibrato_speed = 0;
+	sample->vibrato_depth = 0;
+	sample->vibrato_rate = 0;
+	sample->vibrato_waveform = IT_VIBRATO_SINE;
+	sample->finetune = 0;
+	sample->max_resampling_quality = -1;
+
+	if ( flags & 1 )
+	{
+		if (((unsigned int)sample->loop_end <= (unsigned int)sample->length) &&
+			((unsigned int)sample->loop_start < (unsigned int)sample->loop_end))
+		{
+			sample->length = sample->loop_end;
+			sample->flags |= IT_SAMPLE_LOOP;
+			if ( flags & 0x10 ) sample->flags |= IT_SAMPLE_PINGPONG_LOOP;
+		}
+	}
+
+	sample->data = malloc( sample->length );
+	if ( ! sample->data )
+		return -1;
+
+    dumbfile_getnc( sample->data, sample->length, f );
+
+	if ( ! ( flags & 2 ) )
+	{
+		for ( flags = 0; flags < sample->length; ++flags )
+			( ( signed char * ) sample->data ) [ flags ] ^= 0x80;
+	}
+
+	return 0;
+}
+
+static int it_riff_dsmf_process_pattern( IT_PATTERN * pattern, DUMBFILE * f, int len )
+{
+    int length, row;
+	unsigned flags;
+    long start, end;
+    int p, q, r;
+	IT_ENTRY * entry;
+
+    length = dumbfile_igetw( f );
+	if ( length > len ) return -1;
+
+	len = length - 2;
+
+	pattern->n_rows = 64;
+	pattern->n_entries = 64;
+
+	row = 0;
+
+    start = dumbfile_pos( f );
+    end = start + len;
+
+    while ( (row < 64) && !dumbfile_error( f ) && (dumbfile_pos( f ) < end) ) {
+        p = dumbfile_getc( f );
+        if ( ! p ) {
+			++ row;
+			continue;
+		}
+
+        flags = p & 0xF0;
+
+		if (flags) {
+			++ pattern->n_entries;
+            if (flags & 0x80) dumbfile_skip( f, 1 );
+            if (flags & 0x40) dumbfile_skip( f, 1 );
+            if (flags & 0x20) dumbfile_skip( f, 1 );
+            if (flags & 0x10) dumbfile_skip( f, 2 );
+		}
+	}
+
+	if ( pattern->n_entries == 64 ) return 0;
+
+	pattern->entry = malloc( pattern->n_entries * sizeof( * pattern->entry ) );
+	if ( ! pattern->entry ) return -1;
+
+	entry = pattern->entry;
+
+	row = 0;
+
+    if ( dumbfile_seek( f, start, DFS_SEEK_SET ) ) return -1;
+
+    while ( ( row < 64 ) && !dumbfile_error( f ) && ( dumbfile_pos( f ) < end ) )
+	{
+        p = dumbfile_getc( f );
+        if ( ! p )
+		{
+			IT_SET_END_ROW( entry );
+			++ entry;
+			++ row;
+			continue;
+		}
+
+        flags = p;
+		entry->channel = flags & 0x0F;
+		entry->mask = 0;
+
+		if ( flags & 0xF0 )
+		{
+			if ( flags & 0x80 )
+			{
+                q = dumbfile_getc( f );
+                if ( q )
+				{
+					entry->mask |= IT_ENTRY_NOTE;
+                    entry->note = q - 1;
+				}
+			}
+
+			if ( flags & 0x40 )
+			{
+                q = dumbfile_getc( f );
+                if ( q )
+				{
+					entry->mask |= IT_ENTRY_INSTRUMENT;
+                    entry->instrument = q;
+				}
+			}
+
+			if ( flags & 0x20 )
+			{
+				entry->mask |= IT_ENTRY_VOLPAN;
+                entry->volpan = dumbfile_getc( f );
+			}
+
+			if ( flags & 0x10 )
+			{
+                q = dumbfile_getc( f );
+                r = dumbfile_getc( f );
+                _dumb_it_xm_convert_effect( q, r, entry, 0 );
+			}
+
+			if (entry->mask) entry++;
+		}
+	}
+
+	while ( row < 64 )
+	{
+		IT_SET_END_ROW( entry );
+		++ entry;
+		++ row;
+	}
+
+	pattern->n_entries = (int)(entry - pattern->entry);
+	if ( ! pattern->n_entries ) return -1;
+
+	return 0;
+}
+
+static DUMB_IT_SIGDATA *it_riff_dsmf_load_sigdata( DUMBFILE * f, struct riff * stream )
+{
+	DUMB_IT_SIGDATA *sigdata;
+
+	int n, o, found;
+
+	if ( ! stream ) goto error;
+
+	if ( stream->type != DUMB_ID( 'D', 'S', 'M', 'F' ) ) goto error;
+
+	sigdata = malloc(sizeof(*sigdata));
+	if ( ! sigdata ) goto error;
+
+	sigdata->n_patterns = 0;
+	sigdata->n_samples = 0;
+	sigdata->name[0] = 0;
+
+	found = 0;
+
+    for ( n = 0; (unsigned)n < stream->chunk_count; ++n )
+	{
+		struct riff_chunk * c = stream->chunks + n;
+		switch( c->type )
+		{
+		case DUMB_ID( 'S' ,'O' ,'N' ,'G' ):
+			/* initialization data */
+			if ( ( found ) || ( c->size < 192 ) ) goto error_sd;
+			found = 1;
+			break;
+
+		case DUMB_ID( 'P', 'A', 'T', 'T' ):
+			++ sigdata->n_patterns;
+			break;
+
+		case DUMB_ID( 'I', 'N', 'S', 'T' ):
+			++ sigdata->n_samples;
+			break;
+		}
+	}
+
+	if ( !found || !sigdata->n_samples || !sigdata->n_patterns ) goto error_sd;
+
+	if ( sigdata->n_samples > 255 || sigdata->n_patterns > 255 ) goto error_sd;
+
+	sigdata->song_message = NULL;
+	sigdata->order = NULL;
+	sigdata->instrument = NULL;
+	sigdata->sample = NULL;
+	sigdata->pattern = NULL;
+	sigdata->midi = NULL;
+	sigdata->checkpoint = NULL;
+
+	sigdata->mixing_volume = 48;
+	sigdata->pan_separation = 128;
+
+	sigdata->n_instruments = 0;
+	sigdata->n_orders = 0;
+	sigdata->restart_position = 0;
+
+	memset(sigdata->channel_volume, 64, DUMB_IT_N_CHANNELS);
+
+	for (n = 0; n < DUMB_IT_N_CHANNELS; n += 4) {
+		int sep = 32 * dumb_it_default_panning_separation / 100;
+		sigdata->channel_pan[n  ] = 32 - sep;
+		sigdata->channel_pan[n+1] = 32 + sep;
+		sigdata->channel_pan[n+2] = 32 + sep;
+		sigdata->channel_pan[n+3] = 32 - sep;
+	}
+
+    for ( n = 0; (unsigned)n < stream->chunk_count; ++n )
+	{
+		struct riff_chunk * c = stream->chunks + n;
+		switch ( c->type )
+		{
+		case DUMB_ID( 'S', 'O', 'N', 'G' ):
+            if ( dumbfile_seek( f, c->offset, DFS_SEEK_SET ) ) goto error_usd;
+            dumbfile_getnc( (char *) sigdata->name, 28, f );
+			sigdata->name[ 28 ] = 0;
+			sigdata->flags = IT_STEREO | IT_OLD_EFFECTS | IT_COMPATIBLE_GXX;
+            dumbfile_skip( f, 36 - 28 );
+            sigdata->n_orders = dumbfile_igetw( f );
+			//sigdata->n_samples = ptr[ 38 ] | ( ptr[ 39 ] << 8 ); // whatever
+			//sigdata->n_patterns = ptr[ 40 ] | ( ptr[ 41 ] << 8 );
+            dumbfile_skip( f, 42 - 38 );
+            sigdata->n_pchannels = dumbfile_igetw( f );
+            sigdata->global_volume = dumbfile_getc( f );
+            sigdata->mixing_volume = dumbfile_getc( f );
+            sigdata->speed = dumbfile_getc( f );
+            sigdata->tempo = dumbfile_getc( f );
+
+			for ( o = 0; o < 16; ++o )
+			{
+                sigdata->channel_pan[ o ] = dumbfile_getc( f ) / 2;
+			}
+
+			sigdata->order = malloc( 128 );
+			if ( ! sigdata->order ) goto error_usd;
+            dumbfile_getnc( (char *) sigdata->order, 128, f );
+
+			break;
+		}
+	}
+
+	sigdata->pattern = malloc( sigdata->n_patterns * sizeof( *sigdata->pattern ) );
+	if ( ! sigdata->pattern ) goto error_usd;
+	for ( n = 0; n < sigdata->n_patterns; ++n )
+		sigdata->pattern[ n ].entry = NULL;
+
+	sigdata->sample = malloc( sigdata->n_samples * sizeof( *sigdata->sample ) );
+	if ( ! sigdata->sample ) goto error_usd;
+	for ( n = 0; n < sigdata->n_samples; ++n )
+	{
+		IT_SAMPLE * sample = sigdata->sample + n;
+		sample->data = NULL;
+	}
+
+	sigdata->n_samples = 0;
+	sigdata->n_patterns = 0;
+
+    for ( n = 0; (unsigned)n < stream->chunk_count; ++n )
+	{
+		struct riff_chunk * c = stream->chunks + n;
+		switch ( c->type )
+		{
+		case DUMB_ID( 'P', 'A', 'T', 'T' ):
+            if ( dumbfile_seek( f, c->offset, DFS_SEEK_SET ) ) goto error_usd;
+            if ( it_riff_dsmf_process_pattern( sigdata->pattern + sigdata->n_patterns, f, c->size ) ) goto error_usd;
+			++ sigdata->n_patterns;
+			break;
+
+		case DUMB_ID( 'I', 'N', 'S', 'T' ):
+            if ( dumbfile_seek( f, c->offset, DFS_SEEK_SET ) ) goto error_usd;
+            if ( it_riff_dsmf_process_sample( sigdata->sample + sigdata->n_samples, f, c->size ) ) goto error_usd;
+			++ sigdata->n_samples;
+			break;
+		}
+	}
+
+	_dumb_it_fix_invalid_orders( sigdata );
+
+	return sigdata;
+
+error_usd:
+	_dumb_it_unload_sigdata( sigdata );
+	goto error;
+error_sd:
+	free( sigdata );
+error:
+	return NULL;
+}
+
+DUH *dumb_read_riff_dsmf( DUMBFILE * f, struct riff * stream )
+{
+	sigdata_t *sigdata;
+
+	DUH_SIGTYPE_DESC *descptr = &_dumb_sigtype_it;
+
+    sigdata = it_riff_dsmf_load_sigdata( f, stream );
+
+	if (!sigdata)
+		return NULL;
+
+	{
+		const char *tag[2][2];
+		tag[0][0] = "TITLE";
+        tag[0][1] = (const char *)(((DUMB_IT_SIGDATA *)sigdata)->name);
+		tag[1][0] = "FORMAT";
+		tag[1][1] = "RIFF DSMF";
+		return make_duh( -1, 2, ( const char * const (*) [ 2 ] ) tag, 1, & descptr, & sigdata );
+	}
+}
diff --git a/libraries/dumb/src/it/readmod.c b/libraries/dumb/src/it/readmod.c
new file mode 100644
index 000000000..f73802798
--- /dev/null
+++ b/libraries/dumb/src/it/readmod.c
@@ -0,0 +1,633 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * readmod.c - Code to read a good old-fashioned      / / \  \
+ *             Amiga module from an open file.       | <  /   \_
+ *                                                   |  \/ /\   /
+ * By entheh.                                         \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+static int it_mod_read_pattern(IT_PATTERN *pattern, DUMBFILE *f, int n_channels, unsigned char *buffer)
+{
+	int pos;
+	int channel;
+	int row;
+	IT_ENTRY *entry;
+
+	pattern->n_rows = 64;
+
+	if (n_channels == 0) {
+		/* Read the first four channels, leaving gaps for the rest. */
+		for (pos = 0; pos < 64*8*4; pos += 8*4)
+            dumbfile_getnc((char *)buffer + pos, 4*4, f);
+		/* Read the other channels into the gaps we left. */
+		for (pos = 4*4; pos < 64*8*4; pos += 8*4)
+            dumbfile_getnc((char *)buffer + pos, 4*4, f);
+
+		n_channels = 8;
+	} else
+        dumbfile_getnc((char *)buffer, 64 * n_channels * 4, f);
+
+	if (dumbfile_error(f))
+		return -1;
+
+	/* compute number of entries */
+	pattern->n_entries = 64; /* Account for the row end markers */
+	pos = 0;
+	for (row = 0; row < 64; row++) {
+		for (channel = 0; channel < n_channels; channel++) {
+			if (buffer[pos+0] | buffer[pos+1] | buffer[pos+2] | buffer[pos+3])
+				pattern->n_entries++;
+			pos += 4;
+		}
+	}
+
+	pattern->entry = malloc(pattern->n_entries * sizeof(*pattern->entry));
+	if (!pattern->entry)
+		return -1;
+
+	entry = pattern->entry;
+	pos = 0;
+	for (row = 0; row < 64; row++) {
+		for (channel = 0; channel < n_channels; channel++) {
+			if (buffer[pos+0] | buffer[pos+1] | buffer[pos+2] | buffer[pos+3]) {
+				unsigned char sample = (buffer[pos+0] & 0xF0) | (buffer[pos+2] >> 4);
+				int period = ((int)(buffer[pos+0] & 0x0F) << 8) | buffer[pos+1];
+
+				entry->channel = channel;
+				entry->mask = 0;
+
+				if (period) {
+					int note;
+					entry->mask |= IT_ENTRY_NOTE;
+
+					/* frequency = (AMIGA_DIVISOR / 8) / (period * 2)
+					 * C-1: period = 214 -> frequency = 16726
+					 * so, set C5_speed to 16726
+					 * and period = 214 should translate to C5 aka 60
+					 * halve the period, go up an octive
+					 *
+					 * period = 214 / pow(DUMB_SEMITONE_BASE, note - 60)
+					 * pow(DUMB_SEMITONE_BASE, note - 60) = 214 / period
+					 * note - 60 = log(214/period) / log(DUMB_SEMITONE_BASE)
+					 */
+					note = (int)floor(log(214.0/period) / log(DUMB_SEMITONE_BASE) + 60.5);
+					entry->note = MID(0, note, 119);
+					// or should we preserve the period?
+					//entry->note = buffer[pos+0] & 0x0F; /* High nibble */
+					//entry->volpan = buffer[pos+1]; /* Low byte */
+					// and what about finetune?
+				}
+
+				if (sample) {
+					entry->mask |= IT_ENTRY_INSTRUMENT;
+					entry->instrument = sample;
+				}
+
+				_dumb_it_xm_convert_effect(buffer[pos+2] & 0x0F, buffer[pos+3], entry, 1);
+
+				entry++;
+			}
+			pos += 4;
+		}
+		IT_SET_END_ROW(entry);
+		entry++;
+	}
+
+	return 0;
+}
+
+
+
+static int it_mod_read_sample_header(IT_SAMPLE *sample, DUMBFILE *f, int stk)
+{
+	int finetune, loop_start, loop_length;
+
+/**
+     21       22   Chars     Sample 1 name.  If the name is not a full
+                             22 chars in length, it will be null
+                             terminated.
+
+If
+the sample name begins with a '#' character (ASCII $23 (35)) then this is
+assumed not to be an instrument name, and is probably a message.
+*/
+    dumbfile_getnc((char *)sample->name, 22, f);
+	sample->name[22] = 0;
+
+	sample->filename[0] = 0;
+
+	sample->length = dumbfile_mgetw(f) << 1;
+	finetune = (signed char)(dumbfile_getc(f) << 4) >> 4; /* signed nibble */
+/** Each  finetune step changes  the note 1/8th  of  a  semitone. */
+	sample->global_volume = 64;
+	sample->default_volume = dumbfile_getc(f); // Should we be setting global_volume to this instead?
+	loop_start = dumbfile_mgetw(f);
+	if ( !stk ) loop_start <<= 1;
+	loop_length = dumbfile_mgetw(f) << 1;
+	if ( loop_length > 2 && loop_start + loop_length > sample->length && loop_start / 2 + loop_length <= sample->length )
+		loop_start /= 2;
+	sample->loop_start = loop_start;
+	sample->loop_end = loop_start + loop_length;
+/**
+Once this sample has been played completely from beginning
+to end, if the  repeat length (next field)  is greater than two  bytes it
+will loop back to this position in the sample and continue playing.  Once
+it has played for  the repeat length,  it continues to  loop back to  the
+repeat start offset.  This means the sample continues playing until it is
+told to stop.
+*/
+
+	if (sample->length <= 0) {
+		sample->flags = 0;
+		return 0;
+	}
+
+	sample->flags = IT_SAMPLE_EXISTS;
+
+	sample->default_pan = 0;
+	sample->C5_speed = (int)( AMIGA_CLOCK / 214.0 ); //(long)(16726.0*pow(DUMB_PITCH_BASE, finetune*32));
+	sample->finetune = finetune * 32;
+	// the above line might be wrong
+
+	if (sample->loop_end > sample->length)
+		sample->loop_end = sample->length;
+
+	if (sample->loop_end - sample->loop_start > 2)
+		sample->flags |= IT_SAMPLE_LOOP;
+
+	sample->vibrato_speed = 0;
+	sample->vibrato_depth = 0;
+	sample->vibrato_rate = 0;
+	sample->vibrato_waveform = 0; // do we have to set _all_ these?
+	sample->max_resampling_quality = -1;
+
+	return dumbfile_error(f);
+}
+
+
+
+static int it_mod_read_sample_data(IT_SAMPLE *sample, DUMBFILE *f, uint32 fft)
+{
+	int32 i;
+	int32 truncated_size;
+
+	/* let's get rid of the sample data coming after the end of the loop */
+	if ((sample->flags & IT_SAMPLE_LOOP) && sample->loop_end < sample->length) {
+		truncated_size = sample->length - sample->loop_end;
+		sample->length = sample->loop_end;
+	} else {
+		truncated_size = 0;
+	}
+
+	if (sample->length) {
+		sample->data = malloc(sample->length);
+
+		if (!sample->data)
+			return -1;
+
+		/* Sample data are stored in "8-bit two's compliment format" (sic). */
+		/*
+		for (i = 0; i < sample->length; i++)
+			((signed char *)sample->left)[i] = dumbfile_getc(f);
+		*/
+		/* F U Olivier Lapicque */
+		if (sample->length >= 5)
+		{
+			i = dumbfile_getnc(sample->data, 5, f);
+			if (i == 5)
+			{
+				if (!memcmp(sample->data, "ADPCM", 5))
+				{
+					if (_dumb_it_read_sample_data_adpcm4(sample, f) < 0)
+						return -1;
+
+					return 0;
+				}
+				else
+				{
+					i += dumbfile_getnc(((char *)sample->data) + 5, sample->length - 5, f);
+				}
+			}
+		}
+		else
+		{
+			i = dumbfile_getnc(sample->data, sample->length, f);
+		}
+		if (i < sample->length)
+		{
+			if (i <= 0)
+			{
+				sample->flags = 0;
+				return 0;
+			}
+			sample->length = i;
+			if (sample->loop_end > i) sample->loop_end = i;
+			// holy crap!
+			if (sample->loop_start > i) sample->flags &= ~IT_SAMPLE_LOOP;
+		}
+		else
+		{
+			/* skip truncated data */
+			int feh = dumbfile_error(f);
+
+			if (truncated_size) dumbfile_skip(f, truncated_size);
+			// Should we be truncating it?
+
+			if (feh)
+				return -1;
+		}
+
+		if (fft == DUMB_ID('M',0,0,0) || fft == DUMB_ID('8',0,0,0)) {
+			int delta = 0;
+			for (i = 0; i < sample->length; i++) {
+				delta += ((signed char *)sample->data)[i];
+				((signed char *)sample->data)[i] = delta;
+			}
+		}
+	}
+
+	return 0;
+}
+
+
+
+
+#define MOD_FFT_OFFSET (20 + 31*(22+2+1+1+2+2) + 1 + 1 + 128)
+
+static DUMB_IT_SIGDATA *it_mod_load_sigdata(DUMBFILE *f, int restrict_)
+{
+	DUMB_IT_SIGDATA *sigdata;
+	int n_channels;
+	int i;
+	uint32 fft;
+
+    if ( dumbfile_seek(f, MOD_FFT_OFFSET, DFS_SEEK_SET) )
+        return NULL;
+
+    fft = dumbfile_mgetl(f);
+    if (dumbfile_error(f))
+        return NULL;
+
+    if ( dumbfile_seek(f, 0, DFS_SEEK_SET) )
+        return NULL;
+
+	sigdata = malloc(sizeof(*sigdata));
+	if (!sigdata) {
+		return NULL;
+	}
+
+	/**
+      1       20   Chars     Title of the song.  If the title is not a
+                             full 20 chars in length, it will be null-
+                             terminated.
+	*/
+    if (dumbfile_getnc((char *)sigdata->name, 20, f) < 20) {
+		free(sigdata);
+        return NULL;
+	}
+	sigdata->name[20] = 0;
+
+	sigdata->n_samples = 31;
+
+	switch (fft) {
+		case DUMB_ID('M','.','K','.'):
+		case DUMB_ID('M','!','K','!'):
+		case DUMB_ID('M','&','K','!'):
+		case DUMB_ID('N','.','T','.'):
+		case DUMB_ID('N','S','M','S'):
+		case DUMB_ID('F','L','T','4'):
+		case DUMB_ID('M',0,0,0):
+		case DUMB_ID('8',0,0,0):
+			n_channels = 4;
+			break;
+		case DUMB_ID('F','L','T','8'):
+			n_channels = 0;
+			/* 0 indicates a special case; two four-channel patterns must be
+			 * combined into one eight-channel pattern. Pattern indexes must
+			 * be halved. Why oh why do they obfuscate so?
+			 */
+			/*for (i = 0; i < 128; i++)
+				sigdata->order[i] >>= 1;*/
+			break;
+		case DUMB_ID('C','D','8','1'):
+		case DUMB_ID('O','C','T','A'):
+		case DUMB_ID('O','K','T','A'):
+			n_channels = 8;
+			break;
+		case DUMB_ID('1','6','C','N'):
+			n_channels = 16;
+			break;
+		case DUMB_ID('3','2','C','N'):
+			n_channels = 32;
+			break;
+		default:
+			/* If we get an illegal tag, assume 4 channels 15 samples. */
+			if ((fft & 0x0000FFFFL) == DUMB_ID(0,0,'C','H')) {
+				if (fft >= '1' << 24 && fft < '4' << 24) {
+					n_channels = ((fft & 0x00FF0000L) >> 16) - '0';
+					if ((unsigned int)n_channels >= 10) {
+						/* Rightmost character wasn't a digit. */
+						n_channels = 4;
+						sigdata->n_samples = 15;
+					} else {
+						n_channels += (((fft & 0xFF000000L) >> 24) - '0') * 10;
+						/* MODs should really only go up to 32 channels, but we're lenient. */
+						if ((unsigned int)(n_channels - 1) >= DUMB_IT_N_CHANNELS - 1) {
+							/* No channels or too many? Can't be right... */
+							n_channels = 4;
+							sigdata->n_samples = 15;
+						}
+					}
+				} else {
+					n_channels = 4;
+					sigdata->n_samples = 15;
+				}
+			} else if ((fft & 0x00FFFFFFL) == DUMB_ID(0,'C','H','N')) {
+				n_channels = (fft >> 24) - '0';
+				if ((unsigned int)(n_channels - 1) >= 9) {
+					/* Character was '0' or it wasn't a digit */
+					n_channels = 4;
+					sigdata->n_samples = 15;
+				}
+			} else if ((fft & 0xFFFFFF00L) == DUMB_ID('T','D','Z',0)) {
+				n_channels = (fft & 0x000000FFL) - '0';
+				if ((unsigned int)(n_channels - 1) >= 9) {
+					/* We've been very lenient, given that it should have
+					 * been 1, 2 or 3, but this MOD has been very naughty and
+					 * must be punished.
+					 */
+					n_channels = 4;
+					sigdata->n_samples = 15;
+				}
+			} else {
+				n_channels = 4;
+				sigdata->n_samples = 15;
+			}
+	}
+
+	// moo
+	if ( ( restrict_ & 1 ) && sigdata->n_samples == 15 )
+	{
+		free(sigdata);
+        return NULL;
+	}
+
+	sigdata->n_pchannels = n_channels ? n_channels : 8; /* special case for 0, see above */
+
+	sigdata->sample = malloc(sigdata->n_samples * sizeof(*sigdata->sample));
+	if (!sigdata->sample) {
+		free(sigdata);
+        return NULL;
+	}
+
+	sigdata->song_message = NULL;
+	sigdata->order = NULL;
+	sigdata->instrument = NULL;
+	sigdata->pattern = NULL;
+	sigdata->midi = NULL;
+	sigdata->checkpoint = NULL;
+
+	sigdata->n_instruments = 0;
+
+	for (i = 0; i < sigdata->n_samples; i++)
+		sigdata->sample[i].data = NULL;
+
+	for (i = 0; i < sigdata->n_samples; i++) {
+		if (it_mod_read_sample_header(&sigdata->sample[i], f, sigdata->n_samples == 15)) {
+			_dumb_it_unload_sigdata(sigdata);
+            return NULL;
+		}
+	}
+
+	sigdata->n_orders = dumbfile_getc(f);
+	sigdata->restart_position = dumbfile_getc(f);
+	// what if this is >= 127? what about with Fast Tracker II?
+
+/*	if (sigdata->n_orders <= 0 || sigdata->n_orders > 128) { // is this right?
+		_dumb_it_unload_sigdata(sigdata);
+        return NULL;
+	}*/
+
+	//if (sigdata->restart_position >= sigdata->n_orders)
+		//sigdata->restart_position = 0;
+
+	sigdata->order = malloc(128); /* We may need to scan the extra ones! */
+	if (!sigdata->order) {
+		_dumb_it_unload_sigdata(sigdata);
+        return NULL;
+	}
+    if (dumbfile_getnc((char *)sigdata->order, 128, f) < 128) {
+		_dumb_it_unload_sigdata(sigdata);
+        return NULL;
+	}
+
+	if (sigdata->n_orders <= 0 || sigdata->n_orders > 128) { // is this right?
+		sigdata->n_orders = 128;
+		//while (sigdata->n_orders > 1 && !sigdata->order[sigdata->n_orders - 1]) sigdata->n_orders--;
+	}
+
+	if ( ! n_channels )
+		for (i = 0; i < 128; i++)
+			sigdata->order[i] >>= 1;
+
+	/* "The old NST format contains only 15 samples (instead of 31). Further
+	 * it doesn't contain a file format tag (id). So Pattern data offset is
+	 * at 20+15*30+1+1+128."
+	 * - Then I shall assume the File Format Tag never exists if there are
+	 * only 15 samples. I hope this isn't a faulty assumption...
+	 */
+	if (sigdata->n_samples == 31)
+		dumbfile_skip(f, 4);
+
+	sigdata->n_patterns = -1;
+
+    if ( ( restrict_ & 2 ) )
+	{
+        unsigned char buffer[5];
+        long sample_number;
+        long total_sample_size;
+        long offset = dumbfile_pos(f);
+        long remain = dumbfile_get_size(f) - offset;
+        if ( dumbfile_error( f ) ||
+             dumbfile_seek( f, 0, SEEK_END ) ) {
+            _dumb_it_unload_sigdata(sigdata);
+            return NULL;
+        }
+        sample_number = sigdata->n_samples - 1;
+        total_sample_size = 0;
+        while (dumbfile_pos(f) > offset && sample_number >= 0) {
+            if (sigdata->sample[sample_number].flags & IT_SAMPLE_EXISTS) {
+                if ( dumbfile_seek(f, -((sigdata->sample[sample_number].length + 1) / 2 + 5 + 16), DFS_SEEK_CUR) ||
+                     dumbfile_getnc((char *)buffer, 5, f) < 5 ) {
+                    _dumb_it_unload_sigdata(sigdata);
+                    return NULL;
+                }
+                if ( !memcmp( buffer, "ADPCM", 5 ) ) { /* BAH */
+                    total_sample_size += (sigdata->sample[sample_number].length + 1) / 2 + 5 + 16;
+                    if ( dumbfile_seek(f, -5, DFS_SEEK_CUR) ) {
+                        _dumb_it_unload_sigdata(sigdata);
+                        return NULL;
+                    }
+                } else {
+                    total_sample_size += sigdata->sample[sample_number].length;
+                    if ( dumbfile_seek(f, -(sigdata->sample[sample_number].length - ((sigdata->sample[sample_number].length + 1) / 2 + 5 + 16) + 5), DFS_SEEK_CUR) ) {
+                        _dumb_it_unload_sigdata(sigdata);
+                        return NULL;
+                    }
+                }
+            }
+            --sample_number;
+        }
+
+		if (remain > total_sample_size) {
+			sigdata->n_patterns = ( remain - total_sample_size + 4 ) / ( 256 * sigdata->n_pchannels );
+			if (fft == DUMB_ID('M',0,0,0) || fft == DUMB_ID('8',0,0,0)) {
+				remain -= sigdata->n_patterns * 256 * sigdata->n_pchannels;
+				if (dumbfile_skip(f, remain - total_sample_size)) {
+					_dumb_it_unload_sigdata(sigdata);
+					return NULL;
+				}
+			}
+		}
+	}
+	else
+    {
+        for (i = 0; i < 128; i++)
+        {
+            if (sigdata->order[i] > sigdata->n_patterns)
+                sigdata->n_patterns = sigdata->order[i];
+        }
+		sigdata->n_patterns++;
+	}
+
+	if ( sigdata->n_patterns <= 0 ) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+	/* May as well try to save a tiny bit of memory. */
+	if (sigdata->n_orders < 128) {
+		unsigned char *order = realloc(sigdata->order, sigdata->n_orders);
+		if (order) sigdata->order = order;
+	}
+
+	sigdata->pattern = malloc(sigdata->n_patterns * sizeof(*sigdata->pattern));
+	if (!sigdata->pattern) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+	for (i = 0; i < sigdata->n_patterns; i++)
+		sigdata->pattern[i].entry = NULL;
+
+	/* Read in the patterns */
+	{
+		unsigned char *buffer = malloc(256 * sigdata->n_pchannels); /* 64 rows * 4 bytes */
+		if (!buffer) {
+			_dumb_it_unload_sigdata(sigdata);
+			return NULL;
+		}
+		for (i = 0; i < sigdata->n_patterns; i++) {
+			if (it_mod_read_pattern(&sigdata->pattern[i], f, n_channels, buffer) != 0) {
+				free(buffer);
+				_dumb_it_unload_sigdata(sigdata);
+				return NULL;
+			}
+		}
+		free(buffer);
+	}
+
+	/* And finally, the sample data */
+	for (i = 0; i < sigdata->n_samples; i++) {
+		if (it_mod_read_sample_data(&sigdata->sample[i], f, fft)) {
+			_dumb_it_unload_sigdata(sigdata);
+			return NULL;
+		}
+	}
+
+	/* w00t! */
+	/*if ( n_channels == 4 &&
+		( sigdata->n_samples == 15 ||
+		( ( fft & 240 ) != DUMB_ID( 0, 0, 'C', 0 ) &&
+		( fft & 240 ) != DUMB_ID( 0, 0, 'H', 0 ) &&
+		( fft & 240 ) != 0 ) ) ) {
+		for ( i = 0; i < sigdata->n_samples; ++i ) {
+			IT_SAMPLE * sample = &sigdata->sample [i];
+			if ( sample && ( sample->flags & IT_SAMPLE_EXISTS ) ) {
+				int n, o;
+				o = sample->length;
+				if ( o > 4 ) o = 4;
+				for ( n = 0; n < o; ++n )
+					( ( char * ) sample->data ) [n] = 0;
+			}
+		}
+	}*/
+
+	/* Now let's initialise the remaining variables, and we're done! */
+	sigdata->flags = IT_WAS_AN_XM | IT_WAS_A_MOD | IT_OLD_EFFECTS | IT_COMPATIBLE_GXX | IT_STEREO;
+
+	sigdata->global_volume = 128;
+	sigdata->mixing_volume = 48;
+	/* We want 50 ticks per second; 50/6 row advances per second;
+	 * 50*10=500 row advances per minute; 500/4=125 beats per minute.
+	 */
+	sigdata->speed = 6;
+	sigdata->tempo = 125;
+	sigdata->pan_separation = 128;
+
+	memset(sigdata->channel_volume, 64, DUMB_IT_N_CHANNELS);
+
+	for (i = 0; i < DUMB_IT_N_CHANNELS; i += 4) {
+		int sep = 32 * dumb_it_default_panning_separation / 100;
+		sigdata->channel_pan[i+0] = 32 - sep;
+		sigdata->channel_pan[i+1] = 32 + sep;
+		sigdata->channel_pan[i+2] = 32 + sep;
+		sigdata->channel_pan[i+3] = 32 - sep;
+	}
+
+	_dumb_it_fix_invalid_orders(sigdata);
+
+	return sigdata;
+}
+
+
+
+DUH *DUMBEXPORT dumb_read_mod_quick(DUMBFILE *f, int restrict_)
+{
+	sigdata_t *sigdata;
+
+	DUH_SIGTYPE_DESC *descptr = &_dumb_sigtype_it;
+
+	sigdata = it_mod_load_sigdata(f, restrict_);
+
+	if (!sigdata)
+		return NULL;
+
+	{
+		const char *tag[2][2];
+		tag[0][0] = "TITLE";
+        tag[0][1] = (const char *)(((DUMB_IT_SIGDATA *)sigdata)->name);
+		tag[1][0] = "FORMAT";
+		tag[1][1] = "MOD";
+		return make_duh(-1, 2, (const char *const (*)[2])tag, 1, &descptr, &sigdata);
+	}
+}
diff --git a/libraries/dumb/src/it/readmod2.c b/libraries/dumb/src/it/readmod2.c
new file mode 100644
index 000000000..e1e7a9ce0
--- /dev/null
+++ b/libraries/dumb/src/it/readmod2.c
@@ -0,0 +1,29 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * readmod2.c - Function to read a good old-          / / \  \
+ *              fashioned Amiga module from an       | <  /   \_
+ *              open file and do an initial          |  \/ /\   /
+ *              run-through.                          \_  /  > /
+ *                                                      | \ / /
+ * Split off from readmod.c by entheh.                  |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+
+
+
+DUH *DUMBEXPORT dumb_read_mod(DUMBFILE *f, int restrict_)
+{
+	DUH *duh = dumb_read_mod_quick(f, restrict_);
+	dumb_it_do_initial_runthrough(duh);
+	return duh;
+}
diff --git a/libraries/dumb/src/it/readmtm.c b/libraries/dumb/src/it/readmtm.c
new file mode 100644
index 000000000..77c4f9c76
--- /dev/null
+++ b/libraries/dumb/src/it/readmtm.c
@@ -0,0 +1,413 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * readmtm.c - Code to read a MultiTracker Module     / / \  \
+ *             from an open file.                    | <  /   \_
+ *                                                   |  \/ /\   /
+ * By Chris Moeller.                                  \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "dumb.h"
+#include "internal/it.h"
+
+static size_t strlen_max(const char * ptr, size_t max)
+{
+	const char * end, * start;
+	if (ptr==0) return 0;
+	start = ptr;
+	end = ptr + max;
+	while(*ptr && ptr < end) ptr++;
+	return ptr - start;
+}
+
+static int it_mtm_assemble_pattern(IT_PATTERN *pattern, const unsigned char * track, const unsigned short * sequence, int n_rows)
+{
+	int n, o, note, sample;
+	const unsigned char * t;
+	IT_ENTRY * entry;
+
+	pattern->n_rows = n_rows;
+	pattern->n_entries = n_rows;
+
+	for (n = 0; n < 32; n++) {
+		if (sequence[n]) {
+			t = &track[192 * (sequence[n] - 1)];
+			for (o = 0; o < n_rows; o++) {
+				if (t[0] || t[1] || t[2]) pattern->n_entries++;
+				t += 3;
+			}
+		}
+	}
+
+	entry = malloc(pattern->n_entries * sizeof(*entry));
+	if (!entry) return -1;
+	pattern->entry = entry;
+
+	for (n = 0; n < n_rows; n++) {
+		for (o = 0; o < 32; o++) {
+			if (sequence[o]) {
+				t = &track[192 * (sequence[o] - 1) + (n * 3)];
+				if (t[0] || t[1] || t[2]) {
+					entry->channel = o;
+					entry->mask = 0;
+					note = t[0] >> 2;
+					sample = ((t[0] << 4) | (t[1] >> 4)) & 0x3F;
+
+					if (note) {
+						entry->mask |= IT_ENTRY_NOTE;
+						entry->note = note + 24;
+					}
+
+					if (sample) {
+						entry->mask |= IT_ENTRY_INSTRUMENT;
+						entry->instrument = sample;
+					}
+
+					_dumb_it_xm_convert_effect(t[1] & 0xF, t[2], entry, 1);
+
+					if (entry->mask) entry++;
+				}
+			}
+		}
+		IT_SET_END_ROW(entry);
+		entry++;
+	}
+
+	pattern->n_entries = (int)(entry - pattern->entry);
+
+	return 0;
+}
+
+static int it_mtm_read_sample_header(IT_SAMPLE *sample, DUMBFILE *f)
+{
+	int finetune, flags;
+
+    dumbfile_getnc((char *)sample->name, 22, f);
+	sample->name[22] = 0;
+
+	sample->filename[0] = 0;
+
+	sample->length = dumbfile_igetl(f);
+	sample->loop_start = dumbfile_igetl(f);
+	sample->loop_end = dumbfile_igetl(f);
+	finetune = (signed char)(dumbfile_getc(f) << 4) >> 4; /* signed nibble */
+	sample->global_volume = 64;
+	sample->default_volume = dumbfile_getc(f);
+
+	flags = dumbfile_getc(f);
+
+	if (sample->length <= 0) {
+		sample->flags = 0;
+		return 0;
+	}
+
+	sample->flags = IT_SAMPLE_EXISTS;
+
+	if (flags & 1) {
+		sample->flags |= IT_SAMPLE_16BIT;
+		sample->length >>= 1;
+		sample->loop_start >>= 1;
+		sample->loop_end >>= 1;
+	}
+
+	sample->default_pan = 0;
+	sample->C5_speed = (int)( AMIGA_CLOCK / 214.0 );//(long)(16726.0*pow(DUMB_PITCH_BASE, finetune*32));
+	sample->finetune = finetune * 32;
+	// the above line might be wrong
+
+	if (sample->loop_end > sample->length)
+		sample->loop_end = sample->length;
+
+	if (sample->loop_end - sample->loop_start > 2)
+		sample->flags |= IT_SAMPLE_LOOP;
+
+	sample->vibrato_speed = 0;
+	sample->vibrato_depth = 0;
+	sample->vibrato_rate = 0;
+	sample->vibrato_waveform = 0; // do we have to set _all_ these?
+	sample->max_resampling_quality = -1;
+
+	return dumbfile_error(f);
+}
+
+static int it_mtm_read_sample_data(IT_SAMPLE *sample, DUMBFILE *f)
+{
+	int32 i;
+	int32 truncated_size;
+
+	/* let's get rid of the sample data coming after the end of the loop */
+	if ((sample->flags & IT_SAMPLE_LOOP) && sample->loop_end < sample->length) {
+		truncated_size = sample->length - sample->loop_end;
+		sample->length = sample->loop_end;
+	} else {
+		truncated_size = 0;
+	}
+
+	sample->data = malloc(sample->length);
+
+	if (!sample->data)
+		return -1;
+
+	dumbfile_getnc((char *)sample->data, sample->length, f);
+	dumbfile_skip(f, truncated_size);
+
+	if (dumbfile_error(f))
+		return -1;
+
+	for (i = 0; i < sample->length; i++)
+		((signed char *)sample->data)[i] ^= 0x80;
+
+	return 0;
+}
+
+static DUMB_IT_SIGDATA *it_mtm_load_sigdata(DUMBFILE *f, int * version)
+{
+	DUMB_IT_SIGDATA *sigdata;
+
+	int n, o, n_tracks, l_comment, n_rows, n_channels;
+
+	unsigned char * track;
+
+	unsigned short * sequence;
+
+	char * comment;
+
+	if (dumbfile_getc(f) != 'M' ||
+		dumbfile_getc(f) != 'T' ||
+		dumbfile_getc(f) != 'M') goto error;
+
+	*version = dumbfile_getc(f);
+
+	sigdata = malloc(sizeof(*sigdata));
+	if (!sigdata) goto error;
+
+    dumbfile_getnc((char *)sigdata->name, 20, f);
+	sigdata->name[20] = 0;
+
+	n_tracks = dumbfile_igetw(f);
+	sigdata->n_patterns = dumbfile_getc(f) + 1;
+	sigdata->n_orders = dumbfile_getc(f) + 1;
+	l_comment = dumbfile_igetw(f);
+	sigdata->n_samples = dumbfile_getc(f);
+	//if (dumbfile_getc(f)) goto error_sd;
+	dumbfile_getc(f);
+	n_rows = dumbfile_getc(f);
+	n_channels = dumbfile_getc(f);
+
+	if (dumbfile_error(f) ||
+		(n_tracks <= 0) ||
+		(sigdata->n_samples <= 0) ||
+		(n_rows <= 0 || n_rows > 64) ||
+		(n_channels <= 0 || n_channels > 32)) goto error_sd;
+
+	memset(sigdata->channel_volume, 64, DUMB_IT_N_CHANNELS);
+
+    if (dumbfile_getnc((char *)sigdata->channel_pan, 32, f) < 32) goto error_sd;
+
+	for (n = 0; n < 32; n++) {
+		if (sigdata->channel_pan[n] <= 15) {
+			sigdata->channel_pan[n] -= (sigdata->channel_pan[n] & 8) >> 3;
+			sigdata->channel_pan[n] = (sigdata->channel_pan[n] * 32) / 7;
+		} else {
+			sigdata->channel_volume[n] = 0;
+			sigdata->channel_pan[n] = 7;
+		}
+	}
+
+	for (n = 32; n < DUMB_IT_N_CHANNELS; n += 4) {
+		int sep = 32 * dumb_it_default_panning_separation / 100;
+		sigdata->channel_pan[n  ] = 32 - sep;
+		sigdata->channel_pan[n+1] = 32 + sep;
+		sigdata->channel_pan[n+2] = 32 + sep;
+		sigdata->channel_pan[n+3] = 32 - sep;
+	}
+
+	sigdata->sample = malloc(sigdata->n_samples * sizeof(*sigdata->sample));
+	if (!sigdata->sample) goto error_sd;
+
+	sigdata->flags = IT_WAS_AN_XM | IT_WAS_A_MOD | IT_STEREO | IT_OLD_EFFECTS | IT_COMPATIBLE_GXX;
+
+	sigdata->global_volume = 128;
+	sigdata->mixing_volume = 48;
+	sigdata->speed = 6;
+	sigdata->tempo = 125;
+	sigdata->pan_separation = 128;
+
+	sigdata->song_message = NULL;
+	sigdata->order = NULL;
+	sigdata->instrument = NULL;
+	sigdata->pattern = NULL;
+	sigdata->midi = NULL;
+	sigdata->checkpoint = NULL;
+
+	sigdata->n_instruments = 0;
+
+	sigdata->restart_position = 0;
+	sigdata->n_pchannels = n_channels;
+
+	for (n = 0; n < sigdata->n_samples; n++)
+		sigdata->sample[n].data = NULL;
+
+	for (n = 0; n < sigdata->n_samples; n++) {
+		if (it_mtm_read_sample_header(&sigdata->sample[n], f)) goto error_usd;
+	}
+
+	sigdata->order = malloc(sigdata->n_orders);
+	if (!sigdata->order) goto error_usd;
+
+    if (dumbfile_getnc((char *)sigdata->order, sigdata->n_orders, f) < sigdata->n_orders) goto error_usd;
+	if (sigdata->n_orders < 128)
+		if (dumbfile_skip(f, 128 - sigdata->n_orders)) goto error_usd;
+
+	track = malloc(192 * n_tracks);
+	if (!track) goto error_usd;
+
+    if (dumbfile_getnc((char *)track, 192 * n_tracks, f) < 192 * n_tracks) goto error_ft;
+
+	sigdata->pattern = malloc(sigdata->n_patterns * sizeof(*sigdata->pattern));
+	if (!sigdata->pattern) goto error_ft;
+	for (n = 0; n < sigdata->n_patterns; n++)
+		sigdata->pattern[n].entry = NULL;
+
+	sequence = malloc(sigdata->n_patterns * 32 * sizeof(*sequence));
+	if (!sequence) goto error_ft;
+
+	for (n = 0; n < sigdata->n_patterns; n++) {
+		for (o = 0; o < 32; o++) {
+			sequence[(n * 32) + o] = dumbfile_igetw(f);
+			if (sequence[(n * 32) + o] > n_tracks)
+			{
+				//goto error_fs;
+				// illegal track number, silence instead of rejecting the file
+				sequence[(n * 32) + o] = 0;
+			}
+		}
+	}
+
+	for (n = 0; n < sigdata->n_patterns; n++) {
+		if (it_mtm_assemble_pattern(&sigdata->pattern[n], track, &sequence[n * 32], n_rows)) goto error_fs;
+	}
+
+	if (l_comment) {
+		comment = malloc(l_comment);
+		if (!comment) goto error_fs;
+		if (dumbfile_getnc(comment, l_comment, f) < l_comment) goto error_fc;
+
+		/* Time for annoying "logic", yes. We want each line which has text,
+		 * and each blank line in between all the valid lines.
+		 */
+
+		/* Find last actual line. */
+		for (o = -1, n = 0; n < l_comment; n += 40) {
+			if (comment[n]) o = n;
+		}
+
+		if (o >= 0) {
+
+			size_t l;
+			int m;
+			for (l = 0, n = 0; n <= o; n += 40) {
+				l += strlen_max(&comment[n], 40) + 2;
+			}
+
+			l -= 1;
+
+			sigdata->song_message = malloc(l);
+			if (!sigdata->song_message) goto error_fc;
+
+			for (m = 0, n = 0; n <= o; n += 40) {
+				int p = (int)strlen_max(&comment[n], 40);
+				if (p) {
+					memcpy(sigdata->song_message + m, &comment[n], p);
+					m += p;
+				}
+				if (l - m > 1) {
+					sigdata->song_message[m++] = 13;
+					sigdata->song_message[m++] = 10;
+				}
+			}
+			
+			sigdata->song_message[m] = 0;
+		}
+
+		free(comment);
+	}
+
+	for (n = 0; n < sigdata->n_samples; n++) {
+		if (it_mtm_read_sample_data(&sigdata->sample[n], f)) goto error_fs;
+	}
+
+	_dumb_it_fix_invalid_orders(sigdata);
+
+	free(sequence);
+	free(track);
+
+	return sigdata;
+
+error_fc:
+	free(comment);
+error_fs:
+	free(sequence);
+error_ft:
+	free(track);
+error_usd:
+	_dumb_it_unload_sigdata(sigdata);
+	return NULL;
+
+error_sd:
+	free(sigdata);
+error:
+	return NULL;
+}
+
+static char hexdigit(int in)
+{
+	if (in < 10) return in + '0';
+	else return in + 'A' - 10;
+}
+
+DUH *DUMBEXPORT dumb_read_mtm_quick(DUMBFILE *f)
+{
+	sigdata_t *sigdata;
+	int ver;
+
+	DUH_SIGTYPE_DESC *descptr = &_dumb_sigtype_it;
+
+	sigdata = it_mtm_load_sigdata(f, &ver);
+
+	if (!sigdata)
+		return NULL;
+
+	{
+		char version[16];
+		const char *tag[2][2];
+		tag[0][0] = "TITLE";
+        tag[0][1] = (const char *)(((DUMB_IT_SIGDATA *)sigdata)->name);
+		tag[1][0] = "FORMAT";
+		version[0] = 'M';
+		version[1] = 'T';
+		version[2] = 'M';
+		version[3] = ' ';
+		version[4] = 'v';
+		version[5] = hexdigit(ver >> 4);
+		version[6] = '.';
+		version[7] = hexdigit(ver & 15);
+		version[8] = 0;
+		tag[1][1] = (const char *) &version;
+		return make_duh(-1, 2, (const char *const (*)[2])tag, 1, &descptr, &sigdata);
+	}
+}
diff --git a/libraries/dumb/src/it/readokt.c b/libraries/dumb/src/it/readokt.c
new file mode 100644
index 000000000..c1dc1ce13
--- /dev/null
+++ b/libraries/dumb/src/it/readokt.c
@@ -0,0 +1,558 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * readokt.c - Code to read an Oktalyzer module       / / \  \
+ *             from an open file.                    | <  /   \_
+ *                                                   |  \/ /\   /
+ * By Chris Moeller.                                  \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+static int it_okt_read_pattern(IT_PATTERN *pattern, const unsigned char *data, int length, int n_channels)
+{
+	int pos;
+	int channel;
+	int row;
+	int n_rows;
+	IT_ENTRY *entry;
+
+	if (length < 2) return -1;
+
+	n_rows = (data[0] << 8) | data[1];
+	if (!n_rows) n_rows = 64;
+
+	if (length < 2 + (n_rows * n_channels * 4)) return -1;
+
+	pattern->n_rows = n_rows;
+
+	/* compute number of entries */
+	pattern->n_entries = n_rows; /* Account for the row end markers */
+	pos = 2;
+	for (row = 0; row < pattern->n_rows; row++) {
+		for (channel = 0; channel < n_channels; channel++) {
+			if (data[pos+0] | data[pos+2])
+				pattern->n_entries++;
+			pos += 4;
+		}
+	}
+
+	pattern->entry = (IT_ENTRY *) malloc(pattern->n_entries * sizeof(*pattern->entry));
+	if (!pattern->entry)
+		return -1;
+
+	entry = pattern->entry;
+	pos = 2;
+	for (row = 0; row < n_rows; row++) {
+		for (channel = 0; channel < n_channels; channel++) {
+			if (data[pos+0] | data[pos+2]) {
+				entry->channel = channel;
+				entry->mask = 0;
+
+				if (data[pos+0] > 0 && data[pos+0] <= 36) {
+					entry->mask |= IT_ENTRY_NOTE | IT_ENTRY_INSTRUMENT;
+
+					entry->note = data[pos+0] + 35;
+					entry->instrument = data[pos+1] + 1;
+				}
+
+				entry->effect = 0;
+				entry->effectvalue = data[pos+3];
+
+				switch (data[pos+2]) {
+				case  2: if (data[pos+3]) entry->effect = IT_PORTAMENTO_DOWN; break; // XXX code calls this rs_portu, but it's adding to the period, which decreases the pitch
+				case 13: if (data[pos+3]) entry->effect = IT_OKT_NOTE_SLIDE_DOWN; break;
+				case 21: if (data[pos+3]) entry->effect = IT_OKT_NOTE_SLIDE_DOWN_ROW; break;
+
+				case  1: if (data[pos+3]) entry->effect = IT_PORTAMENTO_UP; break;   // XXX same deal here, increasing the pitch
+				case 17: if (data[pos+3]) entry->effect = IT_OKT_NOTE_SLIDE_UP; break;
+				case 30: if (data[pos+3]) entry->effect = IT_OKT_NOTE_SLIDE_UP_ROW; break;
+
+				case 10: if (data[pos+3]) entry->effect = IT_OKT_ARPEGGIO_3; break;
+				case 11: if (data[pos+3]) entry->effect = IT_OKT_ARPEGGIO_4; break;
+				case 12: if (data[pos+3]) entry->effect = IT_OKT_ARPEGGIO_5; break;
+
+				case 15: entry->effect = IT_S; entry->effectvalue = EFFECT_VALUE(IT_S_SET_FILTER, data[pos+3] & 0x0F); break;
+
+				case 25: entry->effect = IT_JUMP_TO_ORDER; break;
+
+				case 27: entry->note = IT_NOTE_OFF; entry->mask |= IT_ENTRY_NOTE; break;
+
+				case 28: entry->effect = IT_SET_SPEED; break;
+
+				case 31:
+					if ( data[pos+3] <= 0x40 ) entry->effect = IT_SET_CHANNEL_VOLUME;
+					else if ( data[pos+3] <= 0x50 ) { entry->effect = IT_OKT_VOLUME_SLIDE_DOWN; entry->effectvalue = data[pos+3] - 0x40; }
+					else if ( data[pos+3] <= 0x60 ) { entry->effect = IT_OKT_VOLUME_SLIDE_UP;   entry->effectvalue = data[pos+3] - 0x50; }
+					else if ( data[pos+3] <= 0x70 ) { entry->effect = IT_OKT_VOLUME_SLIDE_DOWN; entry->effectvalue = data[pos+3] - 0x50; }
+					else if ( data[pos+3] <= 0x80 ) { entry->effect = IT_OKT_VOLUME_SLIDE_UP;   entry->effectvalue = data[pos+3] - 0x60; }
+					break;
+				}
+
+				if ( entry->effect ) entry->mask |= IT_ENTRY_EFFECT;
+
+				entry++;
+			}
+			pos += 4;
+		}
+		IT_SET_END_ROW(entry);
+		entry++;
+	}
+
+	return 0;
+}
+
+
+
+static void it_okt_read_sample_header(IT_SAMPLE *sample, const unsigned char * data)
+{
+	int loop_start, loop_length;
+
+	memcpy(sample->name, data, 20);
+	sample->name[20] = 0;
+
+	sample->filename[0] = 0;
+
+	sample->length = (data[20] << 24) | (data[21] << 16) | (data[22] << 8) | data[23];
+	sample->global_volume = 64;
+	sample->default_volume = data[29];
+	loop_start = ((data[24] << 8) | data[25]) << 1;
+	loop_length = ((data[26] << 8) | data[27]) << 1;
+	sample->sus_loop_start = loop_start;
+	sample->sus_loop_end = loop_start + loop_length;
+
+	if (sample->length <= 0) {
+		sample->flags = 0;
+		return;
+	}
+
+	sample->flags = IT_SAMPLE_EXISTS;
+
+	sample->default_pan = 0;
+	sample->C5_speed = (int)( AMIGA_CLOCK / 214.0 ); //(long)(16726.0*pow(DUMB_PITCH_BASE, finetune*32));
+	sample->finetune = 0;
+
+	if (sample->sus_loop_end > sample->length)
+		sample->sus_loop_end = sample->length;
+
+	if (loop_length > 2)
+		sample->flags |= IT_SAMPLE_SUS_LOOP;
+
+	sample->vibrato_speed = 0;
+	sample->vibrato_depth = 0;
+	sample->vibrato_rate = 0;
+	sample->vibrato_waveform = 0; // do we have to set _all_ these?
+	sample->max_resampling_quality = -1;
+}
+
+
+
+static int it_okt_read_sample_data(IT_SAMPLE *sample, const char * data, int length)
+{
+	if (length && sample->length) {
+		if (length < sample->length) {
+			sample->length = length;
+			if (length < sample->sus_loop_end) sample->sus_loop_end = length;
+		}
+
+		sample->data = malloc(length);
+
+		if (!sample->data)
+			return -1;
+
+		memcpy(sample->data, data, length);
+	}
+
+	return 0;
+}
+
+
+
+typedef struct IFF_CHUNK IFF_CHUNK;
+typedef struct IFF_CHUNKED IFF_CHUNKED;
+
+struct IFF_CHUNK
+{
+	unsigned type;
+	unsigned char * data;
+	unsigned size;
+};
+
+struct IFF_CHUNKED
+{
+	unsigned chunk_count;
+	IFF_CHUNK * chunks;
+};
+
+
+
+static IFF_CHUNKED *dumbfile_read_okt(DUMBFILE *f)
+{
+	IFF_CHUNKED *mod = (IFF_CHUNKED *) malloc(sizeof(*mod));
+	if (!mod) return NULL;
+
+	mod->chunk_count = 0;
+	mod->chunks = 0;
+
+	for (;;)
+	{
+		long bytes_read;
+		IFF_CHUNK * chunk = ( IFF_CHUNK * ) realloc( mod->chunks, ( mod->chunk_count + 1 ) * sizeof( IFF_CHUNK ) );
+		if ( !chunk )
+		{
+			if ( mod->chunks ) free( mod->chunks );
+			free( mod );
+			return NULL;
+		}
+		mod->chunks = chunk;
+		chunk += mod->chunk_count;
+
+		bytes_read = dumbfile_mgetl( f );
+		if ( bytes_read < 0 ) break;
+
+		chunk->type = bytes_read;
+		chunk->size = dumbfile_mgetl( f );
+
+		if ( dumbfile_error( f ) ) break;
+
+		chunk->data = (unsigned char *) malloc( chunk->size );
+		if ( !chunk->data )
+		{
+			free( mod->chunks );
+			free( mod );
+			return NULL;
+		}
+
+		bytes_read = dumbfile_getnc( ( char * ) chunk->data, chunk->size, f );
+		if ( bytes_read < (long)chunk->size )
+		{
+			if ( bytes_read <= 0 ) {
+				free( chunk->data );
+				break;
+			} else {
+				chunk->size = bytes_read;
+				mod->chunk_count++;
+				break;
+			}
+		}
+
+		mod->chunk_count++;
+	}
+
+	if ( !mod->chunk_count ) {
+		if ( mod->chunks ) free(mod->chunks);
+		free(mod);
+		mod = NULL;
+	}
+
+	return mod;
+}
+
+void free_okt(IFF_CHUNKED * mod)
+{
+	unsigned i;
+	if (mod)
+	{
+		if (mod->chunks)
+		{
+			for (i = 0; i < mod->chunk_count; i++)
+			{
+				if (mod->chunks[i].data) free(mod->chunks[i].data);
+			}
+			free(mod->chunks);
+		}
+		free(mod);
+	}
+}
+
+const IFF_CHUNK * get_chunk_by_type(IFF_CHUNKED * mod, unsigned type, unsigned offset)
+{
+	unsigned i;
+	if (mod)
+	{
+		if (mod->chunks)
+		{
+			for (i = 0; i < mod->chunk_count; i++)
+			{
+				if (mod->chunks[i].type == type)
+				{
+					if (!offset) return &mod->chunks[i];
+					else offset--;
+				}
+			}
+		}
+	}
+	return NULL;
+}
+
+unsigned get_chunk_count(IFF_CHUNKED *mod, unsigned type)
+{
+	unsigned i, count = 0;
+	if (mod)
+	{
+		if (mod->chunks)
+		{
+			for (i = 0; i < mod->chunk_count; i++)
+			{
+				if (mod->chunks[i].type == type) count++;
+			}
+		}
+	}
+	return count;
+}
+
+
+static DUMB_IT_SIGDATA *it_okt_load_sigdata(DUMBFILE *f)
+{
+	DUMB_IT_SIGDATA *sigdata;
+    int n_channels;
+    int i, j, k, l;
+	IFF_CHUNKED *mod;
+	const IFF_CHUNK *chunk;
+
+	char signature[8];
+
+	if (dumbfile_getnc(signature, 8, f) < 8 ||
+		memcmp(signature, "OKTASONG", 8)) {
+		return NULL;
+	}
+
+	mod = dumbfile_read_okt(f);
+	if (!mod)
+		return NULL;
+
+	sigdata = (DUMB_IT_SIGDATA *) malloc(sizeof(*sigdata));
+	if (!sigdata) {
+		free_okt(mod);
+		return NULL;
+	}
+
+	sigdata->name[0] = 0;
+
+	chunk = get_chunk_by_type(mod, DUMB_ID('S','P','E','E'), 0);
+	if (!chunk || chunk->size < 2) {
+		free(sigdata);
+		free_okt(mod);
+		return NULL;
+	}
+
+	sigdata->speed = (chunk->data[0] << 8) | chunk->data[1];
+
+	chunk = get_chunk_by_type(mod, DUMB_ID('S','A','M','P'), 0);
+	if (!chunk || chunk->size < 32) {
+		free(sigdata);
+		free_okt(mod);
+		return NULL;
+	}
+
+	sigdata->n_samples = chunk->size / 32;
+
+	chunk = get_chunk_by_type(mod, DUMB_ID('C','M','O','D'), 0);
+	if (!chunk || chunk->size < 8) {
+		free(sigdata);
+		free_okt(mod);
+		return NULL;
+	}
+
+	n_channels = 0;
+
+	for (i = 0; i < 4; i++) {
+		j = (chunk->data[i * 2] << 8) | chunk->data[i * 2 + 1];
+		if (!j) n_channels++;
+		else if (j == 1) n_channels += 2;
+	}
+
+	if (!n_channels) {
+		free(sigdata);
+		free_okt(mod);
+		return NULL;
+	}
+
+	sigdata->n_pchannels = n_channels;
+
+	sigdata->sample = (IT_SAMPLE *) malloc(sigdata->n_samples * sizeof(*sigdata->sample));
+	if (!sigdata->sample) {
+		free(sigdata);
+		free_okt(mod);
+		return NULL;
+	}
+
+	sigdata->song_message = NULL;
+	sigdata->order = NULL;
+	sigdata->instrument = NULL;
+	sigdata->pattern = NULL;
+	sigdata->midi = NULL;
+	sigdata->checkpoint = NULL;
+
+	sigdata->n_instruments = 0;
+
+	for (i = 0; (unsigned)i < (unsigned)sigdata->n_samples; i++)
+		sigdata->sample[i].data = NULL;
+
+	chunk = get_chunk_by_type(mod, DUMB_ID('S','A','M','P'), 0);
+
+	for (i = 0; (unsigned)i < (unsigned)sigdata->n_samples; i++) {
+		it_okt_read_sample_header(&sigdata->sample[i], chunk->data + 32 * i);
+	}
+
+	sigdata->restart_position = 0;
+
+	chunk = get_chunk_by_type(mod, DUMB_ID('P','L','E','N'), 0);
+	if (!chunk || chunk->size < 2) {
+		_dumb_it_unload_sigdata(sigdata);
+		free_okt(mod);
+		return NULL;
+	}
+
+	sigdata->n_orders = (chunk->data[0] << 8) | chunk->data[1];
+	// what if this is > 128?
+
+	if (sigdata->n_orders <= 0 || sigdata->n_orders > 128) {
+		_dumb_it_unload_sigdata(sigdata);
+		free_okt(mod);
+		return NULL;
+	}
+
+	chunk = get_chunk_by_type(mod, DUMB_ID('P','A','T','T'), 0);
+    if (!chunk || chunk->size < (unsigned)sigdata->n_orders) {
+		_dumb_it_unload_sigdata(sigdata);
+		free_okt(mod);
+		return NULL;
+	}
+
+	sigdata->order = (unsigned char *) malloc(sigdata->n_orders);
+	if (!sigdata->order) {
+		_dumb_it_unload_sigdata(sigdata);
+		free_okt(mod);
+		return NULL;
+	}
+
+	memcpy(sigdata->order, chunk->data, sigdata->n_orders);
+
+	/* Work out how many patterns there are. */
+	chunk = get_chunk_by_type(mod, DUMB_ID('S','L','E','N'), 0);
+	if (!chunk || chunk->size < 2) {
+		_dumb_it_unload_sigdata(sigdata);
+		free_okt(mod);
+		return NULL;
+	}
+
+	sigdata->n_patterns = (chunk->data[0] << 8) | chunk->data[1];
+
+	j = get_chunk_count(mod, DUMB_ID('P','B','O','D'));
+	if (sigdata->n_patterns > (int)j) sigdata->n_patterns = (int)j;
+
+	if (!sigdata->n_patterns) {
+		_dumb_it_unload_sigdata(sigdata);
+		free_okt(mod);
+		return NULL;
+	}
+
+	sigdata->pattern = (IT_PATTERN *) malloc(sigdata->n_patterns * sizeof(*sigdata->pattern));
+	if (!sigdata->pattern) {
+		_dumb_it_unload_sigdata(sigdata);
+		free_okt(mod);
+		return NULL;
+	}
+	for (i = 0; (unsigned)i < (unsigned)sigdata->n_patterns; i++)
+		sigdata->pattern[i].entry = NULL;
+
+	/* Read in the patterns */
+	for (i = 0; (unsigned)i < (unsigned)sigdata->n_patterns; i++) {
+		chunk = get_chunk_by_type(mod, DUMB_ID('P','B','O','D'), i);
+		if (it_okt_read_pattern(&sigdata->pattern[i], chunk->data, chunk->size, n_channels) != 0) {
+			_dumb_it_unload_sigdata(sigdata);
+			free_okt(mod);
+			return NULL;
+		}
+	}
+
+	/* And finally, the sample data */
+	k = get_chunk_count(mod, DUMB_ID('S','B','O','D'));
+	for (i = 0, j = 0; (unsigned)i < (unsigned)sigdata->n_samples && j < k; i++) {
+		if (sigdata->sample[i].flags & IT_SAMPLE_EXISTS) {
+			chunk = get_chunk_by_type(mod, DUMB_ID('S','B','O','D'), j);
+			if (it_okt_read_sample_data(&sigdata->sample[i], (const char *)chunk->data, chunk->size)) {
+				_dumb_it_unload_sigdata(sigdata);
+				free_okt(mod);
+				return NULL;
+			}
+			j++;
+		}
+	}
+	for (; (unsigned)i < (unsigned)sigdata->n_samples; i++) {
+		sigdata->sample[i].flags = 0;
+	}
+
+	chunk = get_chunk_by_type(mod, DUMB_ID('C','M','O','D'), 0);
+
+	for (i = 0, j = 0; i < n_channels && j < 4; j++) {
+		k = (chunk->data[j * 2] << 8) | chunk->data[j * 2 + 1];
+		l = (j == 1 || j == 2) ? 48 : 16;
+		if (k == 0) {
+			sigdata->channel_pan[i++] = l;
+		}
+		else if (k == 1) {
+			sigdata->channel_pan[i++] = l;
+			sigdata->channel_pan[i++] = l;
+		}
+	}
+
+	free_okt(mod);
+
+	/* Now let's initialise the remaining variables, and we're done! */
+	sigdata->flags = IT_WAS_AN_OKT | IT_WAS_AN_XM | IT_WAS_A_MOD | IT_OLD_EFFECTS | IT_COMPATIBLE_GXX | IT_STEREO;
+
+	sigdata->global_volume = 128;
+	sigdata->mixing_volume = 48;
+	/* We want 50 ticks per second; 50/6 row advances per second;
+	 * 50*10=500 row advances per minute; 500/4=125 beats per minute.
+	 */
+	sigdata->tempo = 125;
+	sigdata->pan_separation = 128;
+
+	memset(sigdata->channel_volume, 64, DUMB_IT_N_CHANNELS);
+	memset(sigdata->channel_pan + n_channels, 32, DUMB_IT_N_CHANNELS - n_channels);
+
+	_dumb_it_fix_invalid_orders(sigdata);
+
+	return sigdata;
+}
+
+
+
+DUH *DUMBEXPORT dumb_read_okt_quick(DUMBFILE *f)
+{
+	sigdata_t *sigdata;
+
+	DUH_SIGTYPE_DESC *descptr = &_dumb_sigtype_it;
+
+    sigdata = it_okt_load_sigdata(f);
+
+	if (!sigdata)
+		return NULL;
+
+	{
+		const char *tag[1][2];
+		tag[0][0] = "FORMAT";
+		tag[0][1] = "Oktalyzer";
+		return make_duh(-1, 1, (const char *const (*)[2])tag, 1, &descptr, &sigdata);
+	}
+}
diff --git a/libraries/dumb/src/it/readokt2.c b/libraries/dumb/src/it/readokt2.c
new file mode 100644
index 000000000..ef54b8d0d
--- /dev/null
+++ b/libraries/dumb/src/it/readokt2.c
@@ -0,0 +1,29 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * readokt2.c - Function to read an Oktalyzer         / / \  \
+ *              module from an open file and do      | <  /   \_
+ *              an initial run-through.              |  \/ /\   /
+ *                                                    \_  /  > /
+ *                                                      | \ / /
+ * By Chris Moeller.                                    |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+
+
+
+DUH *DUMBEXPORT dumb_read_okt(DUMBFILE *f)
+{
+	DUH *duh = dumb_read_okt_quick(f);
+	dumb_it_do_initial_runthrough(duh);
+	return duh;
+}
diff --git a/libraries/dumb/src/it/readoldpsm.c b/libraries/dumb/src/it/readoldpsm.c
new file mode 100644
index 000000000..3946568b8
--- /dev/null
+++ b/libraries/dumb/src/it/readoldpsm.c
@@ -0,0 +1,689 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * readpsm.c - Code to read an old Protracker         / / \  \
+ *             Studio module from an open file.      | <  /   \_
+ *                                                   |  \/ /\   /
+ * By Chris Moeller.                                  \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "dumb.h"
+#include "internal/it.h"
+
+static int CDECL psm_sample_compare(const void *e1, const void *e2)
+{
+	const unsigned char * pa = e1;
+	const unsigned char * pb = e2;
+	int a = pa[37] | (pa[38] << 8) | (pa[39] << 16) | (pa[40] << 24);
+	int b = pb[37] | (pb[38] << 8) | (pb[39] << 16) | (pb[40] << 24);
+	return a - b;
+}
+
+static int it_old_psm_read_samples(IT_SAMPLE ** sample, DUMBFILE * f, int * num)
+{
+    int n, o, count = *num, true_num, snum, offset, flags, finetune, delta;
+
+    unsigned char * buffer;
+	const unsigned char * sdata;
+    int32 sample_bytes;
+
+	buffer = malloc(count * 64);
+	if (!buffer) goto error;
+
+    if (dumbfile_getnc((char *)buffer, count * 64, f) < count * 64) goto error_fb;
+
+	true_num = 0;
+
+	for (n = 0; n < count; n++) {
+		snum = buffer[(n * 64) + 45] | (buffer[(n * 64) + 46] << 8);
+		if ((snum < 1) || (snum > 255)) goto error_fb;
+		if (true_num < snum) true_num = snum;
+	}
+
+	if (true_num > count) {
+		IT_SAMPLE * meh = realloc(*sample, true_num * sizeof(*meh));
+		if (!meh) goto error_fb;
+		for (n = count; n < true_num; n++) {
+			meh[n].data = NULL;
+		}
+		*sample = meh;
+		*num = true_num;
+	}
+
+	qsort(buffer, count, 64, &psm_sample_compare);
+
+	for (n = 0; n < true_num; n++) {
+		(*sample)[n].flags = 0;
+	}
+
+	for (n = 0; n < count; n++) {
+		IT_SAMPLE * s;
+		snum = buffer[(n * 64) + 45] | (buffer[(n * 64) + 46] << 8);
+		s = &((*sample)[snum - 1]);
+		memcpy(s->filename, buffer + (n * 64), 13);
+		s->filename[13] = 0;
+		memcpy(s->name, buffer + (n * 64) + 13, 24);
+		s->name[24] = 0;
+		offset = buffer[(n * 64) + 37] | (buffer[(n * 64) + 38] << 8) |
+				 (buffer[(n * 64) + 39] << 16) | (buffer[(n * 64) + 40] << 24);
+		flags = buffer[(n * 64) + 47];
+		s->length = buffer[(n * 64) + 48] | (buffer[(n * 64) + 49] << 8) |
+					(buffer[(n * 64) + 50] << 16) | (buffer[(n * 64) + 51] << 24);
+		s->loop_start = buffer[(n * 64) + 52] | (buffer[(n * 64) + 53] << 8) |
+						(buffer[(n * 64) + 54] << 16) | (buffer[(n * 64) + 55] << 24);
+		s->loop_end = buffer[(n * 64) + 56] | (buffer[(n * 64) + 57] << 8) |
+					  (buffer[(n * 64) + 58] << 16) | (buffer[(n * 64) + 59] << 24);
+
+		if (s->length <= 0) continue;
+
+		finetune = buffer[(n * 64) + 60];
+		s->default_volume = buffer[(n * 64) + 61];
+		s->C5_speed = buffer[(n * 64) + 62] | (buffer[(n * 64) + 63] << 8);
+		if (finetune & 15) {
+			finetune &= 15;
+			if (finetune >= 8) finetune -= 16;
+			//s->C5_speed = (long)((double)s->C5_speed * pow(DUMB_PITCH_BASE, finetune*32));
+			s->finetune = finetune * 32;
+		}
+		else s->finetune = 0;
+
+		s->flags |= IT_SAMPLE_EXISTS;
+		if (flags & 0x41) {
+			s->flags &= ~IT_SAMPLE_EXISTS;
+			continue;
+		}
+		if (flags & 0x20) s->flags |= IT_SAMPLE_PINGPONG_LOOP;
+		if (flags & 4) s->flags |= IT_SAMPLE_16BIT;
+
+		if (flags & 0x80) {
+			s->flags |= IT_SAMPLE_LOOP;
+			if ((unsigned int)s->loop_end > (unsigned int)s->length)
+				s->loop_end = s->length;
+			else if ((unsigned int)s->loop_start >= (unsigned int)s->loop_end)
+				s->flags &= ~IT_SAMPLE_LOOP;
+			else
+				s->length = s->loop_end;
+		}
+
+		s->global_volume = 64;
+
+		s->vibrato_speed = 0;
+		s->vibrato_depth = 0;
+		s->vibrato_rate = 0;
+		s->vibrato_waveform = IT_VIBRATO_SINE;
+		s->max_resampling_quality = -1;
+
+        sample_bytes = s->length * ((flags & 4) ? 2 : 1);
+        s->data = malloc(sample_bytes);
+		if (!s->data) goto error_fb;
+
+        if (dumbfile_seek(f, offset, DFS_SEEK_SET) || dumbfile_getnc(s->data, sample_bytes, f) < sample_bytes) goto error_fb;
+        sdata = ( const unsigned char * ) s->data;
+
+		if (flags & 0x10) {
+			if (flags & 8) {
+				if (flags & 4) {
+					for (o = 0; o < s->length; o++)
+						((short *)s->data)[o] = (sdata[o * 2] | (sdata[(o * 2) + 1] << 8)) ^ 0x8000;
+				} else {
+					for (o = 0; o < s->length; o++)
+						((signed char *)s->data)[o] = sdata[o] ^ 0x80;
+				}
+			} else {
+				if (flags & 4) {
+					for (o = 0; o < s->length; o++)
+						((short *)s->data)[o] = sdata[o * 2] | (sdata[(o * 2) + 1] << 8);
+				} else {
+					memcpy(s->data, sdata, s->length);
+				}
+			}
+		} else {
+			delta = 0;
+			if (flags & 8) {
+				/* unsigned delta? mehhh, does anything even use this? */
+				if (flags & 4) {
+					for (o = 0; o < s->length; o++) {
+						delta += (short)(sdata[o * 2] | (sdata[(o * 2) + 1] << 8));
+						((short *)s->data)[o] = delta ^ 0x8000;
+					}
+				} else {
+					for (o = 0; o < s->length; o++) {
+						delta += (signed char)sdata[o];
+						((signed char *)s->data)[o] = delta ^ 0x80;
+					}
+				}
+			} else {
+				if (flags & 4) {
+					for (o = 0; o < s->length; o++) {
+						delta += (short)(sdata[o * 2] | (sdata[(o * 2) + 1] << 8));
+						((short *)s->data)[o] = delta;
+					}
+				} else {
+					for (o = 0; o < s->length; o++) {
+						delta += (signed char)sdata[o];
+						((signed char *)s->data)[o] = delta;
+					}
+				}
+			}
+		}
+	}
+
+	free(buffer);
+
+	return 0;
+
+error_fb:
+	free(buffer);
+error:
+	return -1;
+}
+
+static int it_old_psm_read_patterns(IT_PATTERN * pattern, DUMBFILE * f, int num, int size, int pchans)
+{
+	int n, offset, psize, rows, chans, row, flags, channel;
+
+	unsigned char * buffer, * ptr, * end;
+
+	IT_ENTRY * entry;
+
+	buffer = malloc(size);
+	if (!buffer) goto error;
+
+    if (dumbfile_getnc((char *)buffer, size, f) < size) goto error_fb;
+
+	offset = 0;
+
+	for (n = 0; n < num; n++) {
+		IT_PATTERN * p = &pattern[n];
+
+		if (offset >= size) goto error_fb;
+
+		ptr = buffer + offset;
+		psize = ptr[0] | (ptr[1] << 8);
+		rows = ptr[2];
+		chans = ptr[3];
+
+		if (!rows || !chans) {
+			p->n_rows = 1;
+			p->n_entries = 0;
+			continue;
+		}
+
+		psize = (psize + 15) & ~15;
+
+		end = ptr + psize;
+		ptr += 4;
+
+		p->n_rows = rows;
+		p->n_entries = rows;
+		row = 0;
+
+		while ((row < rows) && (ptr < end)) {
+			flags = *ptr++;
+			if (!flags) {
+				row++;
+				continue;
+			}
+			if (flags & 0xE0) {
+				p->n_entries++;
+				if (flags & 0x80) ptr += 2;
+				if (flags & 0x40) ptr++;
+				if (flags & 0x20) {
+					ptr++;
+					if (*ptr == 40) ptr += 3;
+					else ptr++;
+				}
+			}
+		}
+
+		entry = malloc(p->n_entries * sizeof(*p->entry));
+		if (!entry) goto error_fb;
+
+		p->entry = entry;
+
+		ptr = buffer + offset + 4;
+		row = 0;
+
+		while ((row < rows) && (ptr < end)) {
+			flags = *ptr++;
+			if (!flags) {
+				IT_SET_END_ROW(entry);
+				entry++;
+				row++;
+				continue;
+			}
+			if (flags & 0xE0) {
+				entry->mask = 0;
+				entry->channel = channel = flags & 0x1F;
+				if (channel >= chans)
+				{
+					//channel = 0;
+					//goto error_fb;
+				}
+				if (flags & 0x80) {
+					if ((*ptr < 60) && (channel < pchans)) {
+						entry->mask |= IT_ENTRY_NOTE;
+						entry->note = *ptr + 35;
+					}
+					ptr++;
+					if (*ptr) {
+						entry->mask |= IT_ENTRY_INSTRUMENT;
+						entry->instrument = *ptr;
+					}
+					ptr++;
+				}
+				if (flags & 0x40) {
+					if (*ptr <= 64) {
+						entry->mask |= IT_ENTRY_VOLPAN;
+						entry->volpan = *ptr;
+					}
+					ptr++;
+				}
+				if (flags & 0x20) {
+					entry->mask |= IT_ENTRY_EFFECT;
+
+					switch (*ptr) {
+						case 1:
+							entry->effect = IT_XM_FINE_VOLSLIDE_UP;
+							entry->effectvalue = ptr[1];
+							break;
+
+						case 2:
+							entry->effect = IT_VOLUME_SLIDE;
+							entry->effectvalue = (ptr[1] << 4) & 0xF0;
+							break;
+
+						case 3:
+							entry->effect = IT_XM_FINE_VOLSLIDE_DOWN;
+							entry->effectvalue = ptr[1];
+							break;
+
+						case 4:
+							entry->effect = IT_VOLUME_SLIDE;
+							entry->effectvalue = ptr[1] & 0xF;
+							break;
+
+						case 10:
+							entry->effect = IT_PORTAMENTO_UP;
+							entry->effectvalue = EFFECT_VALUE(0xF, ptr[1]);
+							break;
+
+						case 11:
+							entry->effect = IT_PORTAMENTO_UP;
+							entry->effectvalue = ptr[1];
+							break;
+
+						case 12:
+							entry->effect = IT_PORTAMENTO_DOWN;
+							entry->effectvalue = EFFECT_VALUE(ptr[1], 0xF);
+							break;
+
+						case 13:
+							entry->effect = IT_PORTAMENTO_DOWN;
+							entry->effectvalue = ptr[1];
+							break;
+
+						case 14:
+							entry->effect = IT_TONE_PORTAMENTO;
+							entry->effectvalue = ptr[1];
+							break;
+
+						case 15:
+							entry->effect = IT_S;
+							entry->effectvalue = EFFECT_VALUE(IT_S_SET_GLISSANDO_CONTROL, ptr[1] & 15);
+							break;
+
+						case 16:
+							entry->effect = IT_VOLSLIDE_TONEPORTA;
+							entry->effectvalue = ptr[1] << 4;
+							break;
+
+						case 17:
+							entry->effect = IT_VOLSLIDE_TONEPORTA;
+							entry->effectvalue = ptr[1] & 0xF;
+							break;
+
+						case 20:
+							entry->effect = IT_VIBRATO;
+							entry->effectvalue = ptr[1];
+							break;
+
+						case 21:
+							entry->effect = IT_S;
+							entry->effectvalue = EFFECT_VALUE(IT_S_SET_VIBRATO_WAVEFORM, ptr[1] & 11);
+							break;
+
+						case 22:
+							entry->effect = IT_VOLSLIDE_VIBRATO;
+							entry->effectvalue = ptr[1] << 4;
+							break;
+
+						case 23:
+							entry->effect = IT_VOLSLIDE_VIBRATO;
+							entry->effectvalue = ptr[1] & 0xF;
+							break;
+
+						case 30:
+							entry->effect = IT_TREMOLO;
+							entry->effectvalue = ptr[1];
+							break;
+
+						case 31:
+							entry->effect = IT_S;
+							entry->effectvalue = EFFECT_VALUE(IT_S_SET_TREMOLO_WAVEFORM, ptr[1] & 11);
+							break;
+
+						case 40:
+							entry->effect = IT_SET_SAMPLE_OFFSET;
+							entry->effectvalue = ptr[2];
+							ptr += 2;
+							break;
+
+						case 41:
+							entry->effect = IT_XM_RETRIGGER_NOTE;
+							entry->effectvalue = ptr[1];
+							break;
+
+						case 42:
+							entry->effect = IT_S;
+							entry->effectvalue = EFFECT_VALUE(IT_S_DELAYED_NOTE_CUT, ptr[1] & 0xF);
+							break;
+
+						case 43:
+							entry->effect = IT_S;
+							entry->effectvalue = EFFECT_VALUE(IT_S_NOTE_DELAY, ptr[1] & 0xF);
+							break;
+
+						case 50:
+							entry->effect = IT_JUMP_TO_ORDER;
+							entry->effectvalue = ptr[1];
+							break;
+
+						case 51:
+							entry->effect = IT_BREAK_TO_ROW;
+							entry->effectvalue = ptr[1];
+							break;
+
+						case 52:
+							entry->effect = IT_S;
+							entry->effectvalue = EFFECT_VALUE(IT_S_PATTERN_LOOP, ptr[1] & 0xF);
+							break;
+
+						case 53:
+							entry->effect = IT_S;
+							entry->effectvalue = EFFECT_VALUE(IT_S_PATTERN_DELAY, ptr[1] & 0xF);
+							break;
+
+						case 60:
+							entry->effect = IT_SET_SPEED;
+							entry->effectvalue = ptr[1];
+							break;
+
+						case 61:
+							entry->effect = IT_SET_SONG_TEMPO;
+							entry->effectvalue = ptr[1];
+							break;
+
+						case 70:
+							entry->effect = IT_ARPEGGIO;
+							entry->effectvalue = ptr[1];
+							break;
+
+						case 71:
+							entry->effect = IT_S;
+							entry->effectvalue = EFFECT_VALUE(IT_S_FINETUNE, ptr[1] & 0xF);
+							break;
+
+						case 72:
+							/* "balance" ... panning? */
+							entry->effect = IT_SET_PANNING;
+							entry->effectvalue = ((ptr[1] - ((ptr[1] & 8) >> 3)) << 5) / 7;
+							break;
+
+						default:
+							entry->mask &= ~IT_ENTRY_EFFECT;
+					}
+
+					ptr += 2;
+				}
+				if (entry->mask) entry++;
+			}
+		}
+
+		p->n_entries = (int)(entry - p->entry);
+		offset += psize;
+	}
+
+	free(buffer);
+
+	return 0;
+
+error_fb:
+	free(buffer);
+error:
+	return -1;
+}
+
+#define PSM_COMPONENT_ORDERS            0
+#define PSM_COMPONENT_PANPOS            1
+#define PSM_COMPONENT_PATTERNS          2
+#define PSM_COMPONENT_SAMPLE_HEADERS    3
+#define PSM_COMPONENT_COMMENTS          4
+
+typedef struct PSM_COMPONENT
+{
+	unsigned char type;
+	int32 offset;
+}
+PSM_COMPONENT;
+
+static int CDECL psm_component_compare(const void *e1, const void *e2)
+{
+	return ((const PSM_COMPONENT *)e1)->offset -
+	       ((const PSM_COMPONENT *)e2)->offset;
+}
+
+static DUMB_IT_SIGDATA *it_old_psm_load_sigdata(DUMBFILE *f)
+{
+	DUMB_IT_SIGDATA *sigdata;
+
+	PSM_COMPONENT *component;
+	int n_components = 0;
+
+	int n, flags, version, pver, n_orders, n_channels, total_pattern_size;
+
+	if (dumbfile_mgetl(f) != DUMB_ID('P','S','M',254)) goto error;
+
+	sigdata = malloc(sizeof(*sigdata));
+	if (!sigdata) goto error;
+
+    if (dumbfile_getnc((char *)sigdata->name, 60, f) < 60 ||
+		sigdata->name[59] != 0x1A) goto error_sd;
+	sigdata->name[59] = 0;
+
+	flags = dumbfile_getc(f);
+	version = dumbfile_getc(f);
+	pver = dumbfile_getc(f);
+	sigdata->speed = dumbfile_getc(f);
+	sigdata->tempo = dumbfile_getc(f);
+	sigdata->mixing_volume = dumbfile_getc(f);
+	sigdata->n_orders = dumbfile_igetw(f);
+	n_orders = dumbfile_igetw(f);
+	sigdata->n_patterns = dumbfile_igetw(f);
+	sigdata->n_samples = dumbfile_igetw(f);
+	sigdata->n_pchannels = dumbfile_igetw(f);
+	n_channels = dumbfile_igetw(f);
+
+	if (dumbfile_error(f) ||
+		(flags & 1) ||
+		(version != 1 && version != 0x10) ||
+		(pver) ||
+		(sigdata->n_orders <= 0) ||
+		(sigdata->n_orders > 255) ||
+		(n_orders > 255) ||
+		(n_orders < sigdata->n_orders) ||
+		(sigdata->n_patterns > 255) ||
+		(sigdata->n_samples > 255) ||
+		(sigdata->n_pchannels > DUMB_IT_N_CHANNELS) ||
+		(sigdata->n_pchannels > n_channels) ||
+		(n_channels > DUMB_IT_N_CHANNELS))
+		goto error_sd;
+
+	sigdata->flags = IT_STEREO | IT_OLD_EFFECTS | IT_COMPATIBLE_GXX;
+
+	sigdata->global_volume = 128;
+	sigdata->pan_separation = 128;
+
+	sigdata->song_message = NULL;
+	sigdata->order = NULL;
+	sigdata->instrument = NULL;
+	sigdata->sample = NULL;
+	sigdata->pattern = NULL;
+	sigdata->midi = NULL;
+	sigdata->checkpoint = NULL;
+
+	sigdata->n_instruments = 0;
+
+	sigdata->restart_position = 0;
+
+	sigdata->order = malloc(sigdata->n_orders);
+	if (!sigdata->order) goto error_usd;
+
+	if (sigdata->n_samples) {
+		sigdata->sample = malloc(sigdata->n_samples * sizeof(*sigdata->sample));
+		if (!sigdata->sample) goto error_usd;
+		for (n = 0; n < sigdata->n_samples; n++)
+			sigdata->sample[n].data = NULL;
+	}
+
+	if (sigdata->n_patterns) {
+		sigdata->pattern = malloc(sigdata->n_patterns * sizeof(*sigdata->pattern));
+		if (!sigdata->pattern) goto error_usd;
+		for (n = 0; n < sigdata->n_patterns; n++)
+			sigdata->pattern[n].entry = NULL;
+	}
+
+	component = malloc(5 * sizeof(*component));
+	if (!component) goto error_usd;
+
+	for (n = 0; n < 5; n++) {
+		component[n_components].offset = dumbfile_igetl(f);
+		if (component[n_components].offset) {
+			component[n_components].type = n;
+			n_components++;
+		}
+	}
+
+	if (!n_components) goto error_fc;
+
+	total_pattern_size = dumbfile_igetl(f);
+	if (!total_pattern_size) goto error_fc;
+
+	qsort(component, n_components, sizeof(PSM_COMPONENT), &psm_component_compare);
+
+	memset(sigdata->channel_volume, 64, DUMB_IT_N_CHANNELS);
+
+	for (n = 0; n < DUMB_IT_N_CHANNELS; n += 4) {
+		int sep = 32 * dumb_it_default_panning_separation / 100;
+		sigdata->channel_pan[n  ] = 32 - sep;
+		sigdata->channel_pan[n+1] = 32 + sep;
+		sigdata->channel_pan[n+2] = 32 + sep;
+		sigdata->channel_pan[n+3] = 32 - sep;
+	}
+
+	for (n = 0; n < n_components; n++)
+	{
+		int o;
+
+        if ( dumbfile_seek(f, component[n].offset, DFS_SEEK_SET) ) goto error_fc;
+
+		switch (component[n].type) {
+
+			case PSM_COMPONENT_ORDERS:
+                if (dumbfile_getnc((char *)sigdata->order, sigdata->n_orders, f) < sigdata->n_orders) goto error_fc;
+				if (n_orders > sigdata->n_orders)
+					if (dumbfile_skip(f, n_orders - sigdata->n_orders))
+                        goto error_fc;
+                if (dumbfile_igetw(f)) goto error_fc;
+				break;
+
+			case PSM_COMPONENT_PANPOS:
+                if (dumbfile_getnc((char *)sigdata->channel_pan, sigdata->n_pchannels, f) < sigdata->n_pchannels) goto error_fc;
+				for (o = 0; o < sigdata->n_pchannels; o++) {
+					sigdata->channel_pan[o] -= (sigdata->channel_pan[o] & 8) >> 3;
+					sigdata->channel_pan[o] = ((int)sigdata->channel_pan[o] << 5) / 7;
+				}
+				break;
+
+			case PSM_COMPONENT_PATTERNS:
+                if (it_old_psm_read_patterns(sigdata->pattern, f, sigdata->n_patterns, total_pattern_size, sigdata->n_pchannels)) goto error_fc;
+				break;
+
+			case PSM_COMPONENT_SAMPLE_HEADERS:
+                if (it_old_psm_read_samples(&sigdata->sample, f, &sigdata->n_samples)) goto error_fc;
+				break;
+
+			case PSM_COMPONENT_COMMENTS:
+				if (dumbfile_mgetl(f) == DUMB_ID('T','E','X','T')) {
+					o = dumbfile_igetw(f);
+					if (o > 0) {
+						sigdata->song_message = malloc(o + 1);
+                        if (dumbfile_getnc((char *)sigdata->song_message, o, f) < o) goto error_fc;
+						sigdata->song_message[o] = 0;
+					}
+				}
+				break;
+		}
+	}
+
+	_dumb_it_fix_invalid_orders(sigdata);
+
+	free(component);
+
+	return sigdata;
+
+error_fc:
+	free(component);
+error_usd:
+	_dumb_it_unload_sigdata(sigdata);
+	return NULL;
+error_sd:
+	free(sigdata);
+error:
+	return NULL;
+}
+
+DUH *DUMBEXPORT dumb_read_old_psm_quick(DUMBFILE *f)
+{
+	sigdata_t *sigdata;
+
+	DUH_SIGTYPE_DESC *descptr = &_dumb_sigtype_it;
+
+	sigdata = it_old_psm_load_sigdata(f);
+
+	if (!sigdata)
+		return NULL;
+
+	{
+		const char *tag[2][2];
+		tag[0][0] = "TITLE";
+        tag[0][1] = (const char *)(((DUMB_IT_SIGDATA *)sigdata)->name);
+		tag[1][0] = "FORMAT";
+		tag[1][1] = "PSM (old)";
+		return make_duh(-1, 2, (const char *const (*)[2])tag, 1, &descptr, &sigdata);
+	}
+}
diff --git a/libraries/dumb/src/it/readpsm.c b/libraries/dumb/src/it/readpsm.c
new file mode 100644
index 000000000..95545a528
--- /dev/null
+++ b/libraries/dumb/src/it/readpsm.c
@@ -0,0 +1,1292 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * readpsm.c - Code to read a Protracker Studio       / / \  \
+ *             module from an open file.             | <  /   \_
+ *                                                   |  \/ /\   /
+ * By Chris Moeller.                                  \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "dumb.h"
+#include "internal/it.h"
+
+#ifndef min
+#define min(a, b) (((a) < (b)) ? (a) : (b))
+#endif
+
+#ifdef _MSC_VER
+#define snprintf sprintf_s
+#endif
+
+#define PSMV_OLD 940730
+#define PSMV_NEW 940902
+
+typedef struct _PSMCHUNK
+{
+	int id;
+	int len;
+	unsigned char * data;
+} PSMCHUNK;
+
+typedef struct _PSMEVENT
+{
+	int type;
+	unsigned char data[8];
+} PSMEVENT;
+
+#define PSM_EVENT_END               0
+#define PSM_EVENT_PLAY_PATTERN      1
+#define PSM_EVENT_JUMP_TO_LINE      4
+#define PSM_EVENT_SET_SPEED         7
+#define PSM_EVENT_SET_BPM           8
+#define PSM_EVENT_SAMPLE_MAP_TABLE 12
+#define PSM_EVENT_CHANGE_PAN       13
+#define PSM_EVENT_CHANGE_VOL       14
+
+static int it_psm_process_sample(IT_SAMPLE * sample, const unsigned char * data, int len, int id, int version) {
+	int flags;
+	int insno = 0;
+	int length = 0;
+	int loopstart = 0;
+	int loopend = 0;
+	int panpos;
+	int defvol = 0;
+	int samplerate = 0;
+
+	if (len < 0x60) return -1;
+
+	flags = data[0];
+
+	if (version == PSMV_OLD) {
+		memcpy(sample->name, data + 0x0D, 34);
+		sample->name[34] = 0;
+
+		insno = data[0x34] | (data[0x35] << 8);
+		length = data[0x36] | (data[0x37] << 8) | (data[0x38] << 16) | (data[0x39] << 24);
+		loopstart = data[0x3A] | (data[0x3B] << 8) | (data[0x3C] << 16) | (data[0x3D] << 24);
+		loopend = data[0x3E] | (data[0x3F] << 8) | (data[0x40] << 16) | (data[0x41] << 24);
+		panpos = data[0x43];
+		defvol = data[0x44];
+		samplerate = data[0x49] | (data[0x4A] << 8) | (data[0x4B] << 16) | (data[0x4C] << 24);
+	} else /*if (version == PSMV_NEW)*/ {
+		memcpy(sample->name, data + 0x11, 34);
+		sample->name[34] = 0;
+
+		insno = data[0x38] | (data[0x39] << 8);
+		length = data[0x3A] | (data[0x3B] << 8) | (data[0x3C] << 16) | (data[0x3D] << 24);
+		loopstart = data[0x3E] | (data[0x3F] << 8) | (data[0x40] << 16) | (data[0x41] << 24);
+		loopend = data[0x42] | (data[0x43] << 8) | (data[0x44] << 16) | (data[0x45] << 24);
+		panpos = data[0x48];
+		defvol = data[0x49];
+		samplerate = data[0x4E] | (data[0x4F] << 8) | (data[0x50] << 16) | (data[0x51] << 24);
+	}
+
+	if (insno != id) return -1;
+
+	if (!length) {
+		sample->flags &= ~IT_SAMPLE_EXISTS;
+		return 0;
+	}
+	
+	if ((length > len - 0x60) || ((flags & 0x7F) != 0)) return -1;
+
+	sample->flags = IT_SAMPLE_EXISTS;
+	sample->length = length;
+	sample->loop_start = loopstart;
+	sample->loop_end = loopend;
+	sample->C5_speed = samplerate;
+	sample->default_volume = defvol >> 1;
+	sample->default_pan = 0;
+	sample->filename[0] = 0;
+	sample->global_volume = 64;
+	sample->vibrato_speed = 0;
+	sample->vibrato_depth = 0;
+	sample->vibrato_rate = 0;
+	sample->vibrato_waveform = IT_VIBRATO_SINE;
+	sample->finetune = 0;
+	sample->max_resampling_quality = -1;
+
+	if (flags & 0x80) {
+		if (((unsigned int)sample->loop_end <= (unsigned int)sample->length) &&
+			((unsigned int)sample->loop_start < (unsigned int)sample->loop_end)) {
+			sample->length = sample->loop_end;
+			sample->flags |= IT_SAMPLE_LOOP;
+		}
+	}
+
+	sample->data = malloc(sample->length);
+	if (!sample->data)
+		return -1;
+
+	flags = 0;
+	data += 0x60;
+
+	for (insno = 0; insno < sample->length; insno++) {
+		flags += (signed char)(*data++);
+		((signed char *)sample->data)[insno] = flags;
+	}
+
+	return 0;
+}
+
+static int it_psm_process_pattern(IT_PATTERN * pattern, const unsigned char * data, int len, int speed, int bpm, const unsigned char * pan, const int * vol, int version) {
+	int length, nrows, row, rowlen, pos;
+	unsigned flags, chan;
+	IT_ENTRY * entry;
+
+	length = data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24);
+	if (len > length) len = length;
+
+	if (version == PSMV_OLD) {
+		if (len < 10) return -1;
+		data += 8;
+		len -= 8;
+	} else /*if (version == PSMV_NEW)*/ {
+		if (len < 14) return -1;
+		data += 12;
+		len -= 12;
+	}
+
+	nrows = data[0] | (data[1] << 8);
+
+	if (!nrows) return 0;
+
+	pattern->n_rows = nrows;
+
+	data += 2;
+	len -= 2;
+
+	pattern->n_entries = 0;
+
+	row = 0;
+	pos = 2;
+	rowlen = data[0] | (data[1] << 8);
+
+	while ((row < nrows) && (pos < len)) {
+		if (pos >= rowlen) {
+			row++;
+			rowlen += data[pos] | (data[pos+1] << 8);
+			pos += 2;
+			continue;
+		}
+
+		flags = data[pos++];
+		chan = data[pos++];
+
+		if (chan > 63) return -1;
+
+		if (flags & 0xF0) {
+			pattern->n_entries++;
+			if (flags & 0x80) pos++;
+			if (flags & 0x40) pos++;
+			if (flags & 0x20) pos++;
+			if (flags & 0x10) {
+				switch (data[pos]) {
+					case 0x29:
+						pos++;
+					case 0x33:
+						pos++;
+					default:
+						pos += 2;
+				}
+			}
+		}
+	}
+
+	if (!pattern->n_entries) return 0;
+
+	pattern->n_entries += nrows;
+	if (speed) pattern->n_entries++;
+	if (bpm >= 0x20) pattern->n_entries++;
+
+	for (pos = 0; pos < 32; pos++) {
+		if (!(pan[pos*2+1] & 0xF9)) pattern->n_entries++;
+		if (vol[pos] != -1) pattern->n_entries++;
+	}
+
+	pattern->entry = malloc(pattern->n_entries * sizeof(*pattern->entry));
+	if (!pattern->entry) return -1;
+
+	entry = pattern->entry;
+
+	if (speed) {
+		entry->channel = 0;
+		entry->mask = IT_ENTRY_EFFECT;
+		entry->effect = IT_SET_SPEED;
+		entry->effectvalue = speed;
+		entry++;
+	}
+
+	if (bpm >= 0x20) {
+		entry->channel = 0;
+		entry->mask = IT_ENTRY_EFFECT;
+		entry->effect = IT_SET_SONG_TEMPO;
+		entry->effectvalue = bpm;
+		entry++;
+	}
+
+	for (pos = 0; pos < 32; pos++) {
+		if (!(pan[pos*2+1] & 0xF9)) {
+			entry->channel = pos;
+			entry->mask = IT_ENTRY_EFFECT;
+			switch (pan[pos*2+1]) {
+			case 0:
+				entry->effect = IT_SET_PANNING;
+				entry->effectvalue = pan[pos*2] ^ 128;
+				break;
+			case 2:
+				entry->effect = IT_S;
+				entry->effectvalue = EFFECT_VALUE(IT_S_SET_SURROUND_SOUND,1);
+				break;
+			case 4:
+				entry->effect = IT_SET_PANNING;
+				entry->effectvalue = 128;
+				break;
+			}
+			entry++;
+		}
+		if (vol[pos] != -1) {
+			entry->channel = pos;
+			entry->mask = IT_ENTRY_EFFECT;
+			entry->effect = IT_SET_CHANNEL_VOLUME;
+			entry->effectvalue = (vol[pos] + 2) >> 2;
+			entry++;
+		}
+	}
+
+	row = 0;
+	pos = 2;
+	rowlen = data[0] | (data[1] << 8);
+
+	while ((row < nrows) && (pos < len)) {
+		if (pos >= rowlen) {
+			IT_SET_END_ROW(entry);
+			entry++;
+			row++;
+			rowlen += data[pos] | (data[pos+1] << 8);
+			pos += 2;
+			continue;
+		}
+
+		flags = data[pos++];
+		entry->channel = data[pos++];
+		entry->mask = 0;
+
+		if (flags & 0xF0) {
+			if (flags & 0x80) {
+				entry->mask |= IT_ENTRY_NOTE;
+				if (version == PSMV_OLD) {
+					if ((data[pos] < 0x80)) entry->note = (data[pos]>>4)*12+(data[pos]&0x0f)+12;
+					else entry->mask &= ~IT_ENTRY_NOTE;
+				} else /*if (version == PSMV_NEW)*/ {
+					if ((data[pos]) && (data[pos] < 84)) entry->note = data[pos] + 35;
+					else entry->mask &= ~IT_ENTRY_NOTE;
+				}
+				pos++;
+			}
+
+			if (flags & 0x40) {
+				entry->mask |= IT_ENTRY_INSTRUMENT;
+				entry->instrument = data[pos++] + 1;
+			}
+
+			if (flags & 0x20) {
+				entry->mask |= IT_ENTRY_VOLPAN;
+				entry->volpan = (data[pos++] + 1) >> 1;
+			}
+
+			if (flags & 0x10) {
+				entry->mask |= IT_ENTRY_EFFECT;
+				length = data[pos+1];
+				switch (data[pos]) {
+					case 1:
+						entry->effect = IT_VOLUME_SLIDE;
+						if (version == PSMV_OLD) entry->effectvalue = ((length&0x1e)<<3) | 0xF;
+						else /*if (version == PSMV_NEW)*/ entry->effectvalue = (length<<4) | 0xF;
+						break;
+
+					case 2:
+						entry->effect = IT_VOLUME_SLIDE;
+						if (version == PSMV_OLD) entry->effectvalue = (length << 3) & 0xF0;
+						else /*if (version == PSMV_NEW)*/ entry->effectvalue = (length << 4) & 0xF0;
+						break;
+
+					case 3:
+						entry->effect = IT_VOLUME_SLIDE;
+						if (version == PSMV_OLD) entry->effectvalue = (length >> 1) | 0xF0;
+						else /*if (version == PSMV_NEW)*/ entry->effectvalue = length | 0xF0;
+						break;
+
+					case 4:
+						entry->effect = IT_VOLUME_SLIDE;
+						if (version == PSMV_OLD) entry->effectvalue = (length >> 1) & 0xF;
+						else /*if (version == PSMV_NEW)*/ entry->effectvalue = length & 0xF;
+						break;
+
+					case 12:
+						entry->effect = IT_PORTAMENTO_UP;
+						if (version == PSMV_OLD) {
+							if (length < 4) entry->effectvalue = length | 0xF0;
+							else entry->effectvalue = length >> 2;
+						} else /*if (version == PSMV_NEW)*/ {
+							entry->effectvalue = length;
+						}
+						break;
+
+					case 14:
+						entry->effect = IT_PORTAMENTO_DOWN;
+						if (version == PSMV_OLD) {
+							if (length < 4) entry->effectvalue = length | 0xF0;
+							else entry->effectvalue = length >> 2;
+						} else /*if (version == PSMV_NEW)*/ {
+							entry->effectvalue = length;
+						}
+						break;
+
+					case 15:
+						entry->effect = IT_TONE_PORTAMENTO;
+						if (version == PSMV_OLD) entry->effectvalue = length >> 2;
+						else /*if (version == PSMV_NEW)*/ entry->effectvalue = length;
+						break;
+
+					case 0x15:
+						entry->effect = IT_VIBRATO;
+						entry->effectvalue = length;
+						break;
+
+					case 0x18:
+						entry->effect = IT_VOLSLIDE_VIBRATO;
+						entry->effectvalue = length;
+						break;
+
+					case 0x29:
+						entry->effect = IT_SET_SAMPLE_OFFSET;
+						entry->effectvalue = data[pos+2];
+						pos += 2;
+						break;
+
+					case 0x2A:
+						entry->effect = IT_RETRIGGER_NOTE;
+						entry->effectvalue = length;
+						break;
+
+					case 0x33:
+#if 0
+						entry->effect = IT_POSITION_JUMP;
+						entry->effectvalue = data[pos+2];
+#else
+						entry->mask &= ~IT_ENTRY_EFFECT;
+#endif
+						pos++;
+						break;
+
+					case 0x34:
+						entry->effect = IT_BREAK_TO_ROW;
+						entry->effectvalue = length;
+						break;
+
+					case 0x3D:
+						entry->effect = IT_SET_SPEED;
+						entry->effectvalue = length;
+						break;
+
+					case 0x3E:
+						if (length >= 0x20) {
+							entry->effect = IT_SET_SONG_TEMPO;
+							entry->effectvalue = length;
+						} else {
+							entry->mask &= ~IT_ENTRY_EFFECT;
+						}
+						break;
+
+					case 0x47:
+						entry->effect = IT_ARPEGGIO;
+						entry->effectvalue = length;
+						break;
+
+					default:
+						return -1;
+				}
+				pos += 2;
+			}
+			if (entry->mask) entry++;
+		}
+	}
+
+	while (row < nrows) {
+		IT_SET_END_ROW(entry);
+		entry++;
+		row++;
+	}
+
+	pattern->n_entries = (int)(entry - pattern->entry);
+	if (!pattern->n_entries) return -1;
+
+	return 0;
+}
+
+
+static void free_chunks(PSMCHUNK * chunk, int count) {
+	int n;
+
+	for (n = 0; n < count; n++) {
+		if (chunk[n].data)
+			free(chunk[n].data);
+	}
+
+	free(chunk);
+}
+
+static void dumb_it_optimize_orders(DUMB_IT_SIGDATA * sigdata);
+
+static int pattcmp( const unsigned char *, const unsigned char *, size_t );
+
+static DUMB_IT_SIGDATA *it_psm_load_sigdata(DUMBFILE *f, int * ver, int subsong)
+{
+	DUMB_IT_SIGDATA *sigdata;
+
+	PSMCHUNK *chunk;
+	int n_chunks = 0;
+
+	PSMCHUNK *songchunk;
+	int n_song_chunks = 0;
+
+	PSMEVENT *event = NULL;
+	int n_events = 0;
+
+	unsigned char * ptr;
+
+	int n, length, o;
+
+	int found;
+
+	int n_patterns = 0;
+
+	int first_pattern_line = -1;
+	int first_pattern;
+
+	int speed, bpm;
+	unsigned char pan[64];
+	int vol[32];
+
+	if (dumbfile_mgetl(f) != DUMB_ID('P','S','M',' ')) goto error;
+
+	length = dumbfile_igetl(f);
+
+	if (dumbfile_mgetl(f) != DUMB_ID('F','I','L','E')) goto error;
+
+	chunk = calloc(768, sizeof(*chunk));
+
+	while (length >= 8) {
+		chunk[n_chunks].id = dumbfile_mgetl(f);
+		n = dumbfile_igetl(f);
+		length -= 8;
+		if (n < 0 || n > length)
+			goto error_fc;
+		chunk[n_chunks].len = n;
+		if (n) {
+			ptr = malloc(n);
+			if (!ptr) goto error_fc;
+            if (dumbfile_getnc((char *)ptr, n, f) < n)
+			{
+				free(ptr);
+				goto error_fc;
+			}
+			chunk[n_chunks].data = ptr;
+		}
+		n_chunks++;
+		length -= n;
+	}
+
+	if (!n_chunks) goto error_fc;
+				
+	sigdata = malloc(sizeof(*sigdata));
+	if (!sigdata) goto error_fc;
+
+	sigdata->n_patterns = 0;
+	sigdata->n_samples = 0;
+	sigdata->name[0] = 0;
+
+	found = 0;
+
+	for (n = 0; n < n_chunks; n++) {
+		PSMCHUNK * c = &chunk[n];
+		switch(c->id) {
+		case DUMB_ID('S','D','F','T'):
+			/* song data format? */
+			if ((found & 1) || (c->len != 8) || memcmp(c->data, "MAINSONG", 8)) goto error_sd;
+			found |= 1;
+			break;
+
+		case DUMB_ID('S','O','N','G'):
+			if (/*(found & 2) ||*/ (c->len < 11) /*|| memcmp(c->data, "MAINSONG", 8)*/) goto error_sd;
+			found |= 2;
+			break;
+
+		case DUMB_ID('D','S','M','P'):
+			sigdata->n_samples++;
+			break;
+
+		case DUMB_ID('T','I','T','L'):
+			length = min((int)sizeof(sigdata->name) - 1, c->len);
+			memcpy(sigdata->name, c->data, length);
+			sigdata->name[length] = 0;
+		}
+	}
+
+	if (found != 3 || !sigdata->n_samples) goto error_sd;
+
+	sigdata->song_message = NULL;
+	sigdata->order = NULL;
+	sigdata->instrument = NULL;
+	sigdata->sample = NULL;
+	sigdata->pattern = NULL;
+	sigdata->midi = NULL;
+	sigdata->checkpoint = NULL;
+
+	sigdata->n_instruments = 0;
+	sigdata->n_orders = 0;
+
+	for (n = 0; n < n_chunks; n++) {
+		PSMCHUNK * c = &chunk[n];
+		if (c->id == DUMB_ID('S','O','N','G')) {
+			if (subsong == 0) break;
+			subsong--;
+		}
+	}
+
+	if (n == n_chunks) return NULL;
+	subsong = n;
+
+	/*for (n = 0; n < n_chunks; n++) {
+		PSMCHUNK * c = &chunk[n];
+		if (c->id == DUMB_ID('S','O','N','G')) {*/
+	{
+		PSMCHUNK * c = &chunk[subsong];
+		{
+			ptr = c->data;
+			if (ptr[10] > 32) goto error_usd;
+			sigdata->n_pchannels = ptr[10];
+			length = c->len - 11;
+			ptr += 11;
+			songchunk = 0;
+			if (length >= 8) {
+				songchunk = malloc(128 * sizeof(*songchunk));
+				if (!songchunk) goto error_usd;
+				while (length >= 8) {
+					songchunk[n_song_chunks].id = DUMB_ID(ptr[0], ptr[1], ptr[2], ptr[3]);
+					n = ptr[4] | (ptr[5] << 8) | (ptr[6] << 16) | (ptr[7] << 24);
+					length -= 8;
+					if (n > length) goto error_sc;
+					songchunk[n_song_chunks].len = n;
+					songchunk[n_song_chunks].data = ptr + 8;
+					n_song_chunks++;
+					length -= n;
+					ptr += 8 + n;
+				}
+			}
+			/*break;*/
+		}
+	}
+
+	if (!n_song_chunks) goto error_sc;
+
+	found = 0;
+
+	for (n = 0; n < n_song_chunks; n++) {
+		PSMCHUNK * c = &songchunk[n];
+
+		if (c->id == DUMB_ID('D','A','T','E')) {
+			/* date of the library build / format spec */
+			if (c->len == 6) {
+				length = c->len;
+				ptr = c->data;
+				while (length > 0) {
+					if (*ptr >= '0' && *ptr <= '9') {
+						found = (found * 10) + (*ptr - '0');
+					} else {
+						found = 0;
+						break;
+					}
+					ptr++;
+					length--;
+				}
+			}
+			break;
+		}
+	}
+
+	/*
+	if (found != 940506 &&
+		found != 940509 &&
+		found != 940510 &&
+		found != 940530 &&
+		found != 940629 &&
+		found != PSMV_OLD &&
+		found != 941011 &&
+		found != PSMV_NEW &&
+		found != 940906 &&
+		found != 940903 &&
+		found != 940914 &&
+		found != 941213 &&
+		found != 800211)   // WTF?
+		goto error_sc;
+	*/
+
+	*ver = found;
+
+	if (found == 800211 ||
+		found == PSMV_NEW ||
+		found == 940903 ||
+		found == 940906 ||
+		found == 940914 ||
+		found == 941213) found = PSMV_NEW;
+	else found = PSMV_OLD;
+
+	memset(sigdata->channel_volume, 64, DUMB_IT_N_CHANNELS);
+
+	for (n = 0; n < DUMB_IT_N_CHANNELS; n += 4) {
+		int sep = 32 * dumb_it_default_panning_separation / 100;
+		sigdata->channel_pan[n  ] = 32 - sep;
+		sigdata->channel_pan[n+1] = 32 + sep;
+		sigdata->channel_pan[n+2] = 32 + sep;
+		sigdata->channel_pan[n+3] = 32 - sep;
+	}
+
+	for (n = 0; n < n_song_chunks; n++) {
+		PSMCHUNK * c = &songchunk[n];
+
+		switch (c->id) {
+			case DUMB_ID('O','P','L','H'):
+				if (c->len < 2) goto error_sc;
+				ptr = c->data;
+				o = ptr[0] | (ptr[1] << 8);
+				if (!o) goto error_sc;
+				event = malloc(o * sizeof(*event));
+				if (!event) goto error_sc;
+				length = c->len - 2;
+				ptr += 2;
+				while ((length > 0) && (n_events < o)) {
+					event[n_events].type = *ptr;
+					switch (*ptr) {
+					case PSM_EVENT_END:
+						ptr++;
+						length--;
+						break;
+
+					case PSM_EVENT_PLAY_PATTERN:
+						if (found == PSMV_OLD) {
+							if (length < 5) goto error_ev;
+							memcpy(event[n_events].data, ptr + 1, 4);
+							ptr += 5;
+							length -= 5;
+						} else /*if (found == PSMV_NEW)*/ {
+							if (length < 9) goto error_ev;
+							memcpy(event[n_events].data, ptr + 1, 8);
+							ptr += 9;
+							length -= 9;
+						}
+						break;
+
+					case PSM_EVENT_SET_SPEED:
+					case PSM_EVENT_SET_BPM:
+						if (length < 2) goto error_ev;
+						event[n_events].data[0] = ptr[1];
+						ptr += 2;
+						length -= 2;
+						break;
+
+					case PSM_EVENT_JUMP_TO_LINE:
+					case PSM_EVENT_CHANGE_VOL:
+						if (length < 3) goto error_ev;
+						memcpy(event[n_events].data, ptr + 1, 2);
+						ptr += 3;
+						length -= 3;
+						break;
+
+					case PSM_EVENT_SAMPLE_MAP_TABLE:
+						if (length < 7) goto error_ev;
+						memcpy(event[n_events].data, ptr + 1, 6);
+						ptr += 7;
+						length -= 7;
+						break;
+
+					case PSM_EVENT_CHANGE_PAN:
+						if (length < 4) goto error_ev;
+						memcpy(event[n_events].data, ptr + 1, 3);
+						ptr += 4;
+						length -= 4;
+						break;
+
+					default:
+						goto error_ev;
+					}
+					n_events++;
+				}
+				break;
+
+			case DUMB_ID('P','P','A','N'):
+				length = c->len;
+				if (length & 1) goto error_ev;
+				ptr = c->data;
+				o = 0;
+				while (length > 0) {
+					switch (ptr[0]) {
+					case 0:
+						sigdata->channel_pan[o] = ((((int)(signed char)ptr[1]) * 32) / 127) + 32;
+						break;
+					case 2:
+						sigdata->channel_pan[o] = IT_SURROUND;
+						break;
+					case 4:
+						sigdata->channel_pan[o] = 32;
+						break;
+					}
+					ptr += 2;
+					length -= 2;
+					if (++o >= DUMB_IT_N_CHANNELS) break;
+				}
+				break;
+
+			/*
+			case DUMB_ID('P','A','T','T'):
+			case DUMB_ID('D','S','A','M'):
+			*/
+		}
+	}
+
+	sigdata->flags = IT_STEREO | IT_OLD_EFFECTS | IT_COMPATIBLE_GXX;
+
+	sigdata->global_volume = 128;
+	sigdata->speed = 6;
+	sigdata->tempo = 125;
+	sigdata->mixing_volume = 48;
+	sigdata->pan_separation = 128;
+
+	speed = 0;
+	bpm = 0;
+	memset(pan, 255, sizeof(pan));
+	memset(vol, 255, sizeof(vol));
+
+	sigdata->n_patterns = n_events;
+	sigdata->pattern = malloc(sigdata->n_patterns * sizeof(*sigdata->pattern));
+	if (!sigdata->pattern) goto error_ev;
+	for (n = 0; n < sigdata->n_patterns; n++)
+		sigdata->pattern[n].entry = NULL;
+
+	for (n = 0; n < n_events; n++) {
+		PSMEVENT * e = &event[n];
+		switch (e->type) {
+		case PSM_EVENT_END:
+			n = n_events;
+			break;
+
+		case PSM_EVENT_PLAY_PATTERN:
+			for (o = 0; o < n_chunks; o++) {
+				PSMCHUNK * c = &chunk[o];
+				if (c->id == DUMB_ID('P','B','O','D')) {
+					ptr = c->data;
+					length = c->len;
+					if (found == PSMV_OLD) {
+						if (length < 8) goto error_ev;
+						if (!pattcmp(ptr + 4, e->data, 4)) {
+							if (it_psm_process_pattern(&sigdata->pattern[n_patterns], ptr, length, speed, bpm, pan, vol, found)) goto error_ev;
+							if (first_pattern_line < 0) {
+								first_pattern_line = n;
+								first_pattern = o;
+							}
+							e->data[0] = n_patterns;
+							e->data[1] = n_patterns >> 8;
+							n_patterns++;
+							break;
+						}
+					} else /*if (found == PSMV_NEW)*/ {
+						if (length < 12) goto error_ev;
+						if (!pattcmp(ptr + 4, e->data, 8)) {
+							if (it_psm_process_pattern(&sigdata->pattern[n_patterns], ptr, length, speed, bpm, pan, vol, found)) goto error_ev;
+							if (first_pattern_line < 0) {
+								first_pattern_line = n;
+								first_pattern = o;
+							}
+							e->data[0] = n_patterns;
+							e->data[1] = n_patterns >> 8;
+							n_patterns++;
+							break;
+						}
+					}
+				}
+			}
+			if (o == n_chunks) goto error_ev;
+
+			speed = 0;
+			bpm = 0;
+			memset(pan, 255, sizeof(pan));
+			memset(vol, 255, sizeof(vol));
+
+			e->type = PSM_EVENT_END;
+			break;
+
+		case PSM_EVENT_JUMP_TO_LINE:
+			o = e->data[0] | (e->data[1] << 8);
+			if (o >= n_events) goto error_ev;
+			if (o == 0) {
+				/* whew! easy case! */
+				sigdata->restart_position = 0;
+				n = n_events;
+			} else if (o == n) {
+				/* freeze */
+				n = n_events;
+			} else if (o > n) {
+				/* jump ahead, setting played event numbers to zero will prevent endless looping */
+				n = o - 1;
+			} else if (o >= first_pattern_line) {
+				/* another semi-easy case */
+				sigdata->restart_position = event[o].data[0] | (event[o].data[1] << 8);
+				n = n_events;
+			} else {
+				/* crud, try to simulate rerunning all of the commands from the indicated
+				 * line up to the first pattern, then dupe the first pattern again.
+				 */
+				/*
+				PSMCHUNK * c = &chunk[first_pattern];
+
+				for (; o < first_pattern_line; o++) {
+					PSMEVENT * ev = &event[o];
+					switch (ev->type) {
+					case PSM_EVENT_SET_SPEED:
+						speed = ev->data[0];
+						break;
+					case PSM_EVENT_SET_BPM:
+						bpm = ev->data[0];
+						break;
+					case PSM_EVENT_CHANGE_PAN:
+						if (ev->data[0] > 31) goto error_ev;
+						pan[ev->data[0] * 2] = ev->data[1];
+						pan[ev->data[0] * 2 + 1] = ev->data[2];
+						break;
+					case PSM_EVENT_CHANGE_VOL:
+						if (ev->data[0] > 31) goto error_ev;
+						vol[ev->data[0]] = ev->data[1];
+						break;
+					}
+				}
+
+				if (it_psm_process_pattern(&sigdata->pattern[n_patterns], c->data, c->len, speed, bpm, pan, vol, found)) goto error_ev;
+				n_patterns++;
+				sigdata->restart_position = 1;
+				n = n_events;
+
+				Eh, what the hell? PSM has no panning commands anyway.
+				*/
+				sigdata->restart_position = 0;
+				n = n_events;
+			}
+			e->type = PSM_EVENT_END;
+			break;
+
+		case PSM_EVENT_SET_SPEED:
+			speed = e->data[0];
+			break;
+
+		case PSM_EVENT_SET_BPM:
+			bpm = e->data[0];
+			break;
+
+		case PSM_EVENT_CHANGE_PAN:
+			o = e->data[0];
+			if (o > 31) goto error_ev;
+			pan[o * 2] = e->data[1];
+			pan[o * 2 + 1] = e->data[2];
+			break;
+
+		case PSM_EVENT_CHANGE_VOL:
+			o = e->data[0];
+			if (o > 31) goto error_ev;
+			vol[o] = e->data[1];
+			break;
+
+		case PSM_EVENT_SAMPLE_MAP_TABLE:
+			if (e->data[0] != 0 || e->data[1] != 0xFF ||
+				e->data[2] != 0 || e->data[3] != 0 ||
+				e->data[4] != 1 || e->data[5] != 0)
+				goto error_ev;
+			break;
+		}
+	}
+
+	if (n_patterns > 256) goto error_ev;
+
+	sigdata->sample = malloc(sigdata->n_samples * sizeof(*sigdata->sample));
+	if (!sigdata->sample) goto error_ev;
+	for (n = 0; n < sigdata->n_samples; n++) {
+		sigdata->sample[n].data = NULL;
+		sigdata->sample[n].flags = 0;
+	}
+
+	o = 0;
+	for (n = 0; n < n_chunks; n++) {
+		PSMCHUNK * c = &chunk[n];
+		if (c->id == DUMB_ID('D','S','M','P')) {
+			if (it_psm_process_sample(&sigdata->sample[o], c->data, c->len, o, found)) goto error_ev;
+			o++;
+		}
+	}
+
+	sigdata->n_orders = n_patterns;
+	sigdata->n_patterns = n_patterns;
+
+	sigdata->order = malloc(n_patterns);
+
+	for (n = 0; n < n_patterns; n++) {
+		sigdata->order[n] = n;
+	}
+
+	free(event);
+	free(songchunk);
+	free_chunks(chunk, n_chunks);
+
+	_dumb_it_fix_invalid_orders(sigdata);
+
+	dumb_it_optimize_orders(sigdata);
+
+	return sigdata;
+
+error_ev:
+	free(event);
+error_sc:
+	if (songchunk) free(songchunk);
+error_usd:
+	_dumb_it_unload_sigdata(sigdata);
+	goto error_fc;
+error_sd:
+	free(sigdata);
+error_fc:
+	free_chunks(chunk, n_chunks);
+error:
+	return NULL;
+}
+
+static int CDECL it_order_compare(const void *e1, const void *e2) {
+	if (*((const char *)e1) < *((const char *)e2))
+		return -1;
+
+	if (*((const char *)e1) > *((const char *)e2))
+		return 1;
+
+	return 0;
+}
+
+/*
+static int it_optimize_compare(const void *e1, const void *e2) {
+	if (((const IT_ENTRY *)e1)->channel < ((const IT_ENTRY *)e2)->channel)
+		return -1;
+
+	if (((const IT_ENTRY *)e1)->channel > ((const IT_ENTRY *)e2)->channel)
+		return 1;
+
+	return 0;
+}
+*/
+
+static int CDECL it_entry_compare(const IT_ENTRY * e1, const IT_ENTRY * e2) {
+	if (IT_IS_END_ROW(e1) && IT_IS_END_ROW(e2)) return 1;
+	if (e1->channel != e2->channel) return 0;
+	if (e1->mask != e2->mask) return 0;
+	if ((e1->mask & IT_ENTRY_NOTE) && (e1->note != e2->note)) return 0;
+	if ((e1->mask & IT_ENTRY_INSTRUMENT) && (e1->instrument != e2->instrument)) return 0;
+	if ((e1->mask & IT_ENTRY_VOLPAN) && (e1->volpan != e2->volpan)) return 0;
+	if ((e1->mask & IT_ENTRY_EFFECT) && ((e1->effect != e2->effect) || (e1->effectvalue != e2->effectvalue))) return 0;
+	return 1;
+}
+
+/*
+static void dumb_it_optimize_pattern(IT_PATTERN * pattern) {
+	IT_ENTRY * entry, * end;
+	IT_ENTRY * rowstart, * rowend;
+	IT_ENTRY * current;
+
+	if (!pattern->n_entries || !pattern->entry) return;
+
+	current = entry = pattern->entry;
+	end = entry + pattern->n_entries;
+
+	while (entry < end) {
+		rowstart = entry;
+		while (!IT_IS_END_ROW(entry)) entry++;
+		rowend = entry;
+		if (rowend > rowstart + 1)
+			qsort(rowstart, rowend - rowstart, sizeof(IT_ENTRY), &it_optimize_compare);
+		entry = rowstart;
+		while (entry < rowend) {
+			if (!(entry->mask)) {}
+			else if (it_entry_compare(entry, current)) {}
+			else if (!(current->mask) ||
+					 ((entry->channel == current->channel) &&
+					 ((entry->mask | current->mask) == (entry->mask ^ current->mask)))) {
+				current->mask |= entry->mask;
+				if (entry->mask & IT_ENTRY_NOTE) current->note = entry->note;
+				if (entry->mask & IT_ENTRY_INSTRUMENT) current->instrument = entry->instrument;
+				if (entry->mask & IT_ENTRY_VOLPAN) current->volpan = entry->volpan;
+				if (entry->mask & IT_ENTRY_EFFECT) {
+					current->effect = entry->effect;
+					current->effectvalue = entry->effectvalue;
+				}
+			} else {
+				if (++current < entry) *current = *entry;
+			}
+			entry++;
+		}
+		if (++current < entry) *current = *entry;
+		entry++;
+	}
+
+	current++;
+
+	if (current < end) {
+		IT_ENTRY * opt;
+		pattern->n_entries = current - pattern->entry;
+		opt = realloc(pattern->entry, pattern->n_entries * sizeof(*pattern->entry));
+		if (opt) pattern->entry = opt;
+	}
+}
+*/
+
+static int it_pattern_compare(const IT_PATTERN * p1, const IT_PATTERN * p2) {
+	IT_ENTRY * e1, * end;
+	IT_ENTRY * e2;
+
+	if (p1 == p2) return 1;
+	if (p1->n_entries != p2->n_entries) return 0;
+	
+	e1 = p1->entry; end = e1 + p1->n_entries;
+	e2 = p2->entry;
+
+	while (e1 < end) {
+		if (!it_entry_compare(e1, e2)) return 0;
+		e1++; e2++;
+	}
+
+	return 1;
+}
+
+static void dumb_it_optimize_orders(DUMB_IT_SIGDATA * sigdata) {
+	int n, o, p;
+
+    /*int last_invalid = (sigdata->flags & IT_WAS_AN_XM) ? 255 : 253;*/
+
+	unsigned char * order_list;
+	int n_patterns;
+
+	IT_PATTERN * pattern;
+
+	if (!sigdata->n_orders || !sigdata->n_patterns) return;
+
+	n_patterns = 0;
+	order_list = malloc(sigdata->n_orders);
+
+	if (!order_list) return;
+
+	for (n = 0; n < sigdata->n_orders; n++) {
+		if (sigdata->order[n] < sigdata->n_patterns) {
+			for (o = 0; o < n_patterns; o++) {
+				if (sigdata->order[n] == order_list[o]) break;
+			}
+			if (o == n_patterns) {
+				order_list[n_patterns++] = sigdata->order[n];
+			}
+		}
+	}
+
+	if (!n_patterns) {
+		free(order_list);
+		return;
+	}
+
+	/*for (n = 0; n < n_patterns; n++) {
+		dumb_it_optimize_pattern(&sigdata->pattern[order_list[n]]);
+	}*/
+
+	for (n = 0; n < n_patterns; n++) {
+		for (o = n + 1; o < n_patterns; o++) {
+			if ((order_list[n] != order_list[o]) &&
+				it_pattern_compare(&sigdata->pattern[order_list[n]], &sigdata->pattern[order_list[o]])) {
+				for (p = 0; p < sigdata->n_orders; p++) {
+					if (sigdata->order[p] == order_list[o]) {
+						sigdata->order[p] = order_list[n];
+					}
+				}
+				for (p = o + 1; p < n_patterns; p++) {
+					if (order_list[p] == order_list[o]) {
+						order_list[p] = order_list[n];
+					}
+				}
+				order_list[o] = order_list[n];
+			}
+		}
+	}
+
+	qsort(order_list, n_patterns, sizeof(*order_list), &it_order_compare);
+
+	for (n = 0, o = 0; n < n_patterns; n++) {
+		if (order_list[n] != order_list[o]) {
+			if (++o < n) order_list[o] = order_list[n];
+		}
+	}
+
+	n_patterns = o + 1;
+
+	pattern = malloc(n_patterns * sizeof(*pattern));
+	if (!pattern) {
+		free(order_list);
+		return;
+	}
+
+	for (n = 0; n < n_patterns; n++) {
+		pattern[n] = sigdata->pattern[order_list[n]];
+	}
+
+	for (n = 0; n < sigdata->n_patterns; n++) {
+		for (o = 0; o < n_patterns; o++) {
+			if (order_list[o] == n) break;
+		}
+		if (o == n_patterns) {
+			if (sigdata->pattern[n].entry)
+				free(sigdata->pattern[n].entry);
+		}
+	}
+
+	free(sigdata->pattern);
+	sigdata->pattern = pattern;
+	sigdata->n_patterns = n_patterns;
+
+	for (n = 0; n < sigdata->n_orders; n++) {
+		for (o = 0; o < n_patterns; o++) {
+			if (sigdata->order[n] == order_list[o]) {
+				sigdata->order[n] = o;
+				break;
+			}
+		}
+	}
+
+	free(order_list);
+}
+
+int DUMBEXPORT dumb_get_psm_subsong_count(DUMBFILE *f) {
+	int length, subsongs;
+	int32 l;
+	
+	if (dumbfile_mgetl(f) != DUMB_ID('P','S','M',' ')) return 0;
+
+	length = dumbfile_igetl(f);
+
+	if (dumbfile_mgetl(f) != DUMB_ID('F','I','L','E')) return 0;
+
+	subsongs = 0;
+
+	while (length >= 8 && !dumbfile_error(f)) {
+		if (dumbfile_mgetl(f) == DUMB_ID('S','O','N','G')) subsongs++;
+		l = dumbfile_igetl(f);
+		dumbfile_skip(f, l);
+		length -= l + 8;
+	}
+
+	if (dumbfile_error(f)) return 0;
+
+	return subsongs;
+}
+
+
+
+/* Eww */
+int pattcmp( const unsigned char * a, const unsigned char * b, size_t l )
+{
+	size_t i, j;
+	int na = 0, nb = 0, k;
+	char * p;
+
+	k = memcmp( a, b, l );
+	if ( !k ) return k;
+
+	/* damnit */
+
+	for ( i = 0; i < l; ++i )
+	{
+		if ( a [i] >= '0' && a [i] <= '9' ) break;
+	}
+
+	if ( i < l )
+	{
+		na = strtoul( (const char *)a + i, &p, 10 );
+		if ( p == (const char *)a + i ) return 1;
+	}
+
+	for ( j = 0; j < l; ++j )
+	{
+		if ( b [j] >= '0' && b [j] <= '9' ) break;
+	}
+
+	if ( j < l )
+	{
+		nb = strtoul( (const char *)b + j, &p, 10 );
+		if ( p == (const char *)b + j ) return -1;
+	}
+
+	if ( i < j ) return -1;
+	else if ( j > i ) return 1;
+
+	k = memcmp( a, b, j );
+	if ( k ) return k;
+
+	return na - nb;
+}
+
+
+
+DUH *DUMBEXPORT dumb_read_psm_quick(DUMBFILE *f, int subsong)
+{
+	sigdata_t *sigdata;
+	int ver;
+
+	DUH_SIGTYPE_DESC *descptr = &_dumb_sigtype_it;
+
+	sigdata = it_psm_load_sigdata(f, &ver, subsong);
+
+	if (!sigdata)
+		return NULL;
+
+	{
+		int n_tags = 2;
+		char version[16];
+		const char *tag[3][2];
+		tag[0][0] = "TITLE";
+        tag[0][1] = (const char *)(((DUMB_IT_SIGDATA *)sigdata)->name);
+		tag[1][0] = "FORMAT";
+		tag[1][1] = "PSM";
+		if ( ver )
+		{
+			tag[2][0] = "FORMATVERSION";
+#if NEED_ITOA
+            snprintf( version, 15, "%u", ver );
+            version[15] = 0;
+#else
+			itoa(ver, version, 10);
+#endif
+			tag[2][1] = (const char *) &version;
+			++n_tags;
+		}
+		return make_duh(-1, n_tags, (const char *const (*)[2])tag, 1, &descptr, &sigdata);
+	}
+}
diff --git a/libraries/dumb/src/it/readptm.c b/libraries/dumb/src/it/readptm.c
new file mode 100644
index 000000000..3052a4daa
--- /dev/null
+++ b/libraries/dumb/src/it/readptm.c
@@ -0,0 +1,554 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * readptm.c - Code to read a Poly Tracker v2.03      / / \  \
+ *             module from an open file.             | <  /   \_
+ *                                                   |  \/ /\   /
+ * By Chris Moeller. Based on reads3m.c               \_  /  > /
+ * by entheh.                                           | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+// IT_STEREO... :o
+#include <stdlib.h>
+#include <string.h>
+
+#include "dumb.h"
+#include "internal/it.h"
+
+
+
+static int it_ptm_read_sample_header(IT_SAMPLE *sample, int32 *offset, DUMBFILE *f)
+{
+	int flags;
+
+	flags = dumbfile_getc(f);
+
+    dumbfile_getnc((char *)sample->filename, 12, f);
+	sample->filename[12] = 0;
+
+	sample->default_volume = dumbfile_getc(f);
+
+	sample->C5_speed = dumbfile_igetw(f) << 1;
+
+	dumbfile_skip(f, 2); /* segment */
+
+	*offset = dumbfile_igetl(f);
+
+	sample->length = dumbfile_igetl(f);
+	sample->loop_start = dumbfile_igetl(f);
+	sample->loop_end = dumbfile_igetl(f);
+
+	/* GUSBegin, GUSLStart, GUSLEnd, GUSLoop, reserverd */
+	dumbfile_skip(f, 4+4+4+1+1);
+
+    dumbfile_getnc((char *)sample->name, 28, f);
+	sample->name[28] = 0;
+
+	/*
+	if (dumbfile_mgetl(f) != DUMB_ID('P','T','M','S'))
+		return -1;
+	*/
+
+	/* BLAH! Shit likes to have broken or missing sample IDs */
+	dumbfile_skip(f, 4);
+
+	if ((flags & 3) == 0) {
+		/* Looks like no sample */
+		sample->flags &= ~IT_SAMPLE_EXISTS;
+		return dumbfile_error(f);
+	}
+
+	sample->global_volume = 64;
+
+	sample->flags = IT_SAMPLE_EXISTS;
+	if (flags & 4) sample->flags |= IT_SAMPLE_LOOP;
+	if (flags & 8) sample->flags |= IT_SAMPLE_PINGPONG_LOOP;
+
+	if (flags & 16) {
+		sample->flags |= IT_SAMPLE_16BIT;
+
+		sample->length >>= 1;
+		sample->loop_start >>= 1;
+		sample->loop_end >>= 1;
+	}
+
+	if (sample->loop_end) sample->loop_end--;
+
+	sample->default_pan = 0; // 0 = don't use, or 160 = centre?
+
+	if (sample->length <= 0)
+		sample->flags &= ~IT_SAMPLE_EXISTS;
+	else if (sample->flags & IT_SAMPLE_LOOP) {
+		if ((unsigned int)sample->loop_end > (unsigned int)sample->length)
+			sample->flags &= ~IT_SAMPLE_LOOP;
+		else if ((unsigned int)sample->loop_start >= (unsigned int)sample->loop_end)
+			sample->flags &= ~IT_SAMPLE_LOOP;
+		else
+			sample->length = sample->loop_end;
+	}
+
+
+	//Do we need to set all these?
+	sample->vibrato_speed = 0;
+	sample->vibrato_depth = 0;
+	sample->vibrato_rate = 0;
+	sample->vibrato_waveform = IT_VIBRATO_SINE;
+	sample->finetune = 0;
+	sample->max_resampling_quality = -1;
+
+	return dumbfile_error(f);
+}
+
+
+static int it_ptm_read_byte(DUMBFILE *f)
+{
+	int meh = dumbfile_getc(f);
+	if (meh < 0) return 0;
+	return meh;
+}
+
+static int it_ptm_read_sample_data(IT_SAMPLE *sample, int last, DUMBFILE *f)
+{
+	int32 n;
+	int s;
+
+	sample->data = malloc(sample->length * (sample->flags & IT_SAMPLE_16BIT ? 2 : 1));
+	if (!sample->data)
+		return -1;
+
+	s = 0;
+
+	if (sample->flags & IT_SAMPLE_16BIT) {
+		unsigned char a, b;
+		for (n = 0; n < sample->length; n++) {
+			a = s += (signed char) it_ptm_read_byte(f);
+			b = s += (signed char) it_ptm_read_byte(f);
+			((short *)sample->data)[n] = a | (b << 8);
+		}
+	} else {
+		for (n = 0; n < sample->length; n++) {
+			s += (signed char) it_ptm_read_byte(f);
+			((signed char *)sample->data)[n] = s;
+		}
+	}
+
+	if (dumbfile_error(f) && !last)
+		return -1;
+
+	return 0;
+}
+
+
+
+static int it_ptm_read_pattern(IT_PATTERN *pattern, DUMBFILE *f, unsigned char *buffer, int length)
+{
+	int buflen = 0;
+	int bufpos = 0;
+	int effect, effectvalue;
+
+	IT_ENTRY *entry;
+
+	unsigned char channel;
+
+	if (!length)
+		return -1;
+
+	pattern->n_rows = 0;
+	pattern->n_entries = 0;
+
+	/* Read in the pattern data, little by little, and work out how many
+	 * entries we need room for. Sorry, but this is just so funny...
+	 */
+	for (;;) {
+		unsigned char b = buffer[buflen++] = dumbfile_getc(f);
+
+#if 1
+		static const unsigned char used[8] = {0, 2, 2, 4, 1, 3, 3, 5};
+		channel = b & 31;
+		b >>= 5;
+		pattern->n_entries++;
+		if (b) {
+			if (buflen + used[b] >= 65536) return -1;
+            dumbfile_getnc((char *)buffer + buflen, used[b], f);
+			buflen += used[b];
+		} else {
+			/* End of row */
+			if (++pattern->n_rows == 64) break;
+			if (buflen >= 65536) return -1;
+		}
+#else
+		if (b == 0) {
+			/* End of row */
+			pattern->n_entries++;
+			if (++pattern->n_rows == 64) break;
+			if (buflen >= 65536) return -1;
+		} else {
+			static const unsigned char used[8] = {0, 2, 2, 4, 1, 3, 3, 5};
+			channel = b & 31;
+			b >>= 5;
+			if (b) {
+				pattern->n_entries++;
+				if (buflen + used[b] >= 65536) return -1;
+				dumbfile_getnc(buffer + buflen, used[b], f);
+				buflen += used[b];
+			}
+		}
+#endif
+
+		/* We have ensured that buflen < 65536 at this point, so it is safe
+		 * to iterate and read at least one more byte without checking.
+		 * However, now would be a good time to check for errors reading from
+		 * the file.
+		 */
+
+		if (dumbfile_error(f))
+			return -1;
+
+		/* Great. We ran out of data, but there should be data for more rows.
+		 * Fill the rest with null data...
+		 */
+		if (buflen >= length && pattern->n_rows < 64)
+		{
+			while (pattern->n_rows < 64)
+			{
+				if (buflen >= 65536) return -1;
+				buffer[buflen++] = 0;
+				pattern->n_entries++;
+				pattern->n_rows++;
+			}
+			break;
+		}
+	}
+
+	pattern->entry = malloc(pattern->n_entries * sizeof(*pattern->entry));
+
+	if (!pattern->entry)
+		return -1;
+
+	entry = pattern->entry;
+
+	while (bufpos < buflen) {
+		unsigned char b = buffer[bufpos++];
+
+		if (b == 0)
+		{
+			/* End of row */
+			IT_SET_END_ROW(entry);
+			entry++;
+			continue;
+		}
+
+		channel = b & 31;
+
+		if (b & 224) {
+			entry->mask = 0;
+			entry->channel = channel;
+
+			if (b & 32) {
+				unsigned char n = buffer[bufpos++];
+				if (n == 254 || (n >= 1 && n <= 120)) {
+					if (n == 254)
+						entry->note = IT_NOTE_CUT;
+					else
+						entry->note = n - 1;
+					entry->mask |= IT_ENTRY_NOTE;
+				}
+
+				entry->instrument = buffer[bufpos++];
+				if (entry->instrument)
+					entry->mask |= IT_ENTRY_INSTRUMENT;
+			}
+
+			if (b & 64) {
+				effect = buffer[bufpos++];
+				effectvalue = buffer[bufpos++];
+				_dumb_it_ptm_convert_effect(effect, effectvalue, entry);
+			}
+
+			if (b & 128) {
+				entry->volpan = buffer[bufpos++];
+				if (entry->volpan <= 64)
+					entry->mask |= IT_ENTRY_VOLPAN;
+			}
+
+			entry++;
+		}
+	}
+
+	ASSERT(entry == pattern->entry + pattern->n_entries);
+
+	return 0;
+}
+
+
+
+/** WARNING: this is duplicated in itread.c - also bad practice to use the same struct name unless they are unified in a header */
+/* Currently we assume the sample data are stored after the sample headers in
+ * module files. This assumption may be unjustified; let me know if you have
+ * trouble.
+ */
+
+#define PTM_COMPONENT_INSTRUMENT 1
+#define PTM_COMPONENT_PATTERN    2
+#define PTM_COMPONENT_SAMPLE     3
+
+typedef struct PTM_COMPONENT
+{
+	unsigned char type;
+	unsigned char n;
+	int32 offset;
+}
+PTM_COMPONENT;
+
+
+
+static int CDECL ptm_component_compare(const void *e1, const void *e2)
+{
+	return ((const PTM_COMPONENT *)e1)->offset -
+	       ((const PTM_COMPONENT *)e2)->offset;
+}
+
+
+
+static DUMB_IT_SIGDATA *it_ptm_load_sigdata(DUMBFILE *f)
+{
+	DUMB_IT_SIGDATA *sigdata;
+
+	PTM_COMPONENT *component;
+	int n_components = 0;
+
+	int n;
+
+	unsigned char *buffer;
+
+	sigdata = malloc(sizeof(*sigdata));
+	if (!sigdata) return NULL;
+
+	/* Skip song name. */
+    dumbfile_getnc((char *)sigdata->name, 28, f);
+	sigdata->name[28] = 0;
+
+	if (dumbfile_getc(f) != 0x1A || dumbfile_igetw(f) != 0x203) {
+		free(sigdata);
+		return NULL;
+	}
+
+	dumbfile_skip(f, 1);
+
+	sigdata->song_message = NULL;
+	sigdata->order = NULL;
+	sigdata->instrument = NULL;
+	sigdata->sample = NULL;
+	sigdata->pattern = NULL;
+	sigdata->midi = NULL;
+	sigdata->checkpoint = NULL;
+
+	sigdata->n_orders = dumbfile_igetw(f);
+	sigdata->n_instruments = 0;
+	sigdata->n_samples = dumbfile_igetw(f);
+	sigdata->n_patterns = dumbfile_igetw(f);
+
+	if (dumbfile_error(f) || sigdata->n_orders <= 0 || sigdata->n_samples > 255 || sigdata->n_patterns > 128) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+	sigdata->n_pchannels = dumbfile_igetw(f);
+
+	if (dumbfile_igetw(f) != 0) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+	dumbfile_skip(f, 2);
+
+	if (dumbfile_mgetl(f) != DUMB_ID('P','T','M','F')) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+	dumbfile_skip(f, 16);
+
+	sigdata->order = malloc(sigdata->n_orders);
+	if (!sigdata->order) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+	if (sigdata->n_samples) {
+		sigdata->sample = malloc(sigdata->n_samples * sizeof(*sigdata->sample));
+		if (!sigdata->sample) {
+			_dumb_it_unload_sigdata(sigdata);
+			return NULL;
+		}
+		for (n = 0; n < sigdata->n_samples; n++)
+			sigdata->sample[n].data = NULL;
+	}
+
+	if (sigdata->n_patterns) {
+		sigdata->pattern = malloc(sigdata->n_patterns * sizeof(*sigdata->pattern));
+		if (!sigdata->pattern) {
+			_dumb_it_unload_sigdata(sigdata);
+			return NULL;
+		}
+		for (n = 0; n < sigdata->n_patterns; n++)
+			sigdata->pattern[n].entry = NULL;
+	}
+
+	/** WARNING: which ones? */
+	sigdata->flags = IT_STEREO | IT_OLD_EFFECTS | IT_COMPATIBLE_GXX | IT_WAS_A_PTM;
+
+	sigdata->global_volume = 128;
+	sigdata->speed = 6;
+	sigdata->tempo = 125;
+	sigdata->mixing_volume = 48;
+
+	/* Panning positions for 32 channels */
+	{
+		int i;
+		for (i = 0; i < 32; i++) {
+			int c = dumbfile_getc(f);
+			if (c <= 15) {
+				sigdata->channel_volume[i] = 64;
+				sigdata->channel_pan[i] = c;
+			} else {
+				/** WARNING: this could be improved if we support channel muting... */
+				sigdata->channel_volume[i] = 0;
+				sigdata->channel_pan[i] = 7;
+			}
+		}
+	}
+
+	/* Orders, byte each, length = sigdata->n_orders (should be even) */
+    dumbfile_getnc((char *)sigdata->order, sigdata->n_orders, f);
+	sigdata->restart_position = 0;
+
+	component = malloc(768*sizeof(*component));
+	if (!component) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+    if (dumbfile_seek(f, 352, DFS_SEEK_SET)) {
+		free(component);
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+	for (n = 0; n < sigdata->n_patterns; n++) {
+		component[n_components].type = PTM_COMPONENT_PATTERN;
+		component[n_components].n = n;
+		component[n_components].offset = dumbfile_igetw(f) << 4;
+		n_components++;
+	}
+
+    if (dumbfile_seek(f, 608, DFS_SEEK_SET)) {
+		free(component);
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+	for (n = 0; n < sigdata->n_samples; n++) {
+		if (it_ptm_read_sample_header(&sigdata->sample[n], &component[n_components].offset, f)) {
+			free(component);
+			_dumb_it_unload_sigdata(sigdata);
+			return NULL;
+		}
+		if (!(sigdata->sample[n].flags & IT_SAMPLE_EXISTS)) continue;
+		component[n_components].type = PTM_COMPONENT_SAMPLE;
+		component[n_components].n = n;
+		n_components++;
+	}
+
+	qsort(component, n_components, sizeof(PTM_COMPONENT), &ptm_component_compare);
+
+	{
+		int i;
+		for (i = 0; i < 32; i++) {
+			sigdata->channel_pan[i] -= (sigdata->channel_pan[i] & 8) >> 3;
+			sigdata->channel_pan[i] = ((int)sigdata->channel_pan[i] << 5) / 7;
+			if (sigdata->channel_pan[i] > 64) sigdata->channel_pan[i] = 64;
+		}
+	}
+
+	sigdata->pan_separation = 128;
+
+	if (dumbfile_error(f)) {
+		free(component);
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+	buffer = malloc(65536);
+	if (!buffer) {
+		free(component);
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+	for (n = 0; n < n_components; n++) {
+        if (dumbfile_seek(f, component[n].offset, DFS_SEEK_SET)) {
+			free(buffer);
+			free(component);
+			_dumb_it_unload_sigdata(sigdata);
+			return NULL;
+		}
+
+		switch (component[n].type) {
+
+			case PTM_COMPONENT_PATTERN:
+				if (it_ptm_read_pattern(&sigdata->pattern[component[n].n], f, buffer, (n + 1 < n_components) ? (component[n+1].offset - component[n].offset) : 0)) {
+					free(buffer);
+					free(component);
+					_dumb_it_unload_sigdata(sigdata);
+					return NULL;
+				}
+				break;
+
+			case PTM_COMPONENT_SAMPLE:
+				if (it_ptm_read_sample_data(&sigdata->sample[component[n].n], (n + 1 == n_components), f)) {
+					free(buffer);
+					free(component);
+					_dumb_it_unload_sigdata(sigdata);
+					return NULL;
+				}
+		}
+	}
+
+	free(buffer);
+	free(component);
+
+	_dumb_it_fix_invalid_orders(sigdata);
+
+	return sigdata;
+}
+
+DUH *DUMBEXPORT dumb_read_ptm_quick(DUMBFILE *f)
+{
+	sigdata_t *sigdata;
+
+	DUH_SIGTYPE_DESC *descptr = &_dumb_sigtype_it;
+
+	sigdata = it_ptm_load_sigdata(f);
+
+	if (!sigdata)
+		return NULL;
+
+	{
+		const char *tag[2][2];
+		tag[0][0] = "TITLE";
+        tag[0][1] = (const char *)(((DUMB_IT_SIGDATA *)sigdata)->name);
+		tag[1][0] = "FORMAT";
+		tag[1][1] = "PTM";
+		return make_duh(-1, 2, (const char *const (*)[2])tag, 1, &descptr, &sigdata);
+	}
+}
diff --git a/libraries/dumb/src/it/readriff.c b/libraries/dumb/src/it/readriff.c
new file mode 100644
index 000000000..4843f0527
--- /dev/null
+++ b/libraries/dumb/src/it/readriff.c
@@ -0,0 +1,57 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * readriff.c - Code to read a RIFF module file       / / \  \
+ *              from memory.                         | <  /   \_
+ *                                                   |  \/ /\   /
+ *                                                    \_  /  > /
+ * By Chris Moeller.                                    | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+#include "internal/it.h"
+#include "internal/riff.h"
+
+
+DUH *dumb_read_riff_amff( DUMBFILE * f, struct riff * stream );
+DUH *dumb_read_riff_am( DUMBFILE * f, struct riff * stream );
+DUH *dumb_read_riff_dsmf( DUMBFILE * f, struct riff * stream );
+
+/* dumb_read_riff_quick(): reads a RIFF file into a DUH struct, returning a
+ * pointer to the DUH struct. When you have finished with it, you must pass
+ * the pointer to unload_duh() so that the memory can be freed.
+ */
+DUH *DUMBEXPORT dumb_read_riff_quick( DUMBFILE * f )
+{
+	DUH * duh;
+	struct riff * stream;
+    long size;
+
+    size = dumbfile_get_size(f);
+
+    stream = riff_parse( f, 0, size, 1 );
+    if ( ! stream ) stream = riff_parse( f, 0, size, 0 );
+
+	if ( ! stream ) return 0;
+
+	if ( stream->type == DUMB_ID( 'A', 'M', ' ', ' ' ) )
+        duh = dumb_read_riff_am( f, stream );
+	else if ( stream->type == DUMB_ID( 'A', 'M', 'F', 'F' ) )
+        duh = dumb_read_riff_amff( f, stream );
+	else if ( stream->type == DUMB_ID( 'D', 'S', 'M', 'F' ) )
+        duh = dumb_read_riff_dsmf( f, stream );
+	else duh = 0;
+
+	riff_free( stream );
+
+	return duh;
+}
diff --git a/libraries/dumb/src/it/reads3m.c b/libraries/dumb/src/it/reads3m.c
new file mode 100644
index 000000000..43b6128a4
--- /dev/null
+++ b/libraries/dumb/src/it/reads3m.c
@@ -0,0 +1,766 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * reads3m.c - Code to read a ScreamTracker 3         / / \  \
+ *             module from an open file.             | <  /   \_
+ *                                                   |  \/ /\   /
+ * By entheh.                                         \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+// IT_STEREO... :o
+#include <stdlib.h>
+#include <string.h>
+
+#include "dumb.h"
+#include "internal/it.h"
+
+static int it_s3m_read_sample_header(IT_SAMPLE *sample, int32 *offset, unsigned char *pack, int cwtv, DUMBFILE *f)
+{
+	unsigned char type;
+	int flags;
+
+	type = dumbfile_getc(f);
+
+    dumbfile_getnc((char *)sample->filename, 12, f);
+	sample->filename[12] = 0;
+
+	if (type > 1) {
+		/** WARNING: no adlib support */
+		dumbfile_skip(f, 3 + 12 + 1 + 1 + 2 + 2 + 2 + 12);
+        dumbfile_getnc((char *)sample->name, 28, f);
+		sample->name[28] = 0;
+		dumbfile_skip(f, 4);
+		sample->flags &= ~IT_SAMPLE_EXISTS;
+		return dumbfile_error(f);
+	}
+
+	*offset = dumbfile_getc(f) << 20;
+	*offset += dumbfile_igetw(f) << 4;
+
+	sample->length = dumbfile_igetl(f);
+	sample->loop_start = dumbfile_igetl(f);
+	sample->loop_end = dumbfile_igetl(f);
+
+	sample->default_volume = dumbfile_getc(f);
+
+	dumbfile_skip(f, 1);
+
+	flags = dumbfile_getc(f);
+
+	if (flags < 0 || (flags != 0 && flags != 4))
+		/* Sample is packed apparently (or error reading from file). We don't
+		 * know how to read packed samples.
+		 */
+		return -1;
+
+	*pack = flags;
+
+	flags = dumbfile_getc(f);
+
+	sample->C5_speed = dumbfile_igetl(f) << 1;
+
+	/* Skip four unused bytes and three internal variables. */
+	dumbfile_skip(f, 4+2+2+4);
+
+    dumbfile_getnc((char *)sample->name, 28, f);
+	sample->name[28] = 0;
+
+	if (type == 0 || sample->length <= 0) {
+		/* Looks like no-existy. Anyway, there's for sure no 'SCRS' ... */
+		sample->flags &= ~IT_SAMPLE_EXISTS;
+		return dumbfile_error(f);
+	}
+
+	if (dumbfile_mgetl(f) != DUMB_ID('S','C','R','S'))
+		return -1;
+
+	sample->global_volume = 64;
+
+	sample->flags = IT_SAMPLE_EXISTS;
+	if (flags & 1) sample->flags |= IT_SAMPLE_LOOP;
+
+	/* The ST3 TECH.DOC is unclear on this, but IMAGO Orpheus is not. Piece of crap. */
+
+	if (flags & 2) {
+		sample->flags |= IT_SAMPLE_STEREO;
+
+		if ((cwtv & 0xF000) == 0x2000) {
+			sample->length >>= 1;
+			sample->loop_start >>= 1;
+			sample->loop_end >>= 1;
+		}
+	}
+
+	if (flags & 4) {
+		sample->flags |= IT_SAMPLE_16BIT;
+
+		if ((cwtv & 0xF000) == 0x2000) {
+			sample->length >>= 1;
+			sample->loop_start >>= 1;
+			sample->loop_end >>= 1;
+		}
+	}
+
+	sample->default_pan = 0; // 0 = don't use, or 160 = centre?
+
+	if (sample->flags & IT_SAMPLE_LOOP) {
+		if ((unsigned int)sample->loop_end > (unsigned int)sample->length)
+			/*sample->flags &= ~IT_SAMPLE_LOOP;*/
+			sample->loop_end = sample->length;
+		else if ((unsigned int)sample->loop_start >= (unsigned int)sample->loop_end)
+			sample->flags &= ~IT_SAMPLE_LOOP;
+		else
+			/* ScreamTracker seems not to save what comes after the loop end
+			 * point, but rather to assume it is a duplicate of what comes at
+			 * the loop start point. I am not completely sure of this though.
+			 * It is easy to evade; simply truncate the sample.
+			 */
+			sample->length = sample->loop_end;
+	}
+
+
+	//Do we need to set all these?
+	sample->vibrato_speed = 0;
+	sample->vibrato_depth = 0;
+	sample->vibrato_rate = 0;
+	sample->vibrato_waveform = IT_VIBRATO_SINE;
+	sample->finetune = 0;
+	sample->max_resampling_quality = -1;
+
+	return dumbfile_error(f);
+}
+
+
+
+static int it_s3m_read_sample_data(IT_SAMPLE *sample, int ffi, unsigned char pack, DUMBFILE *f)
+{
+	int32 n;
+
+	int32 datasize = sample->length;
+	if (sample->flags & IT_SAMPLE_STEREO) datasize <<= 1;
+
+	sample->data = malloc(datasize * (sample->flags & IT_SAMPLE_16BIT ? 2 : 1));
+	if (!sample->data)
+		return -1;
+
+	if (pack == 4) {
+		if (_dumb_it_read_sample_data_adpcm4(sample, f) < 0)
+			return -1;
+	}
+	else if (sample->flags & IT_SAMPLE_STEREO) {
+		if (sample->flags & IT_SAMPLE_16BIT) {
+			for (n = 0; n < datasize; n += 2)
+				((short *)sample->data)[n] = dumbfile_igetw(f);
+			for (n = 1; n < datasize; n += 2)
+				((short *)sample->data)[n] = dumbfile_igetw(f);
+		} else {
+			for (n = 0; n < datasize; n += 2)
+				((signed char *)sample->data)[n] = dumbfile_getc(f);
+			for (n = 1; n < datasize; n += 2)
+				((signed char *)sample->data)[n] = dumbfile_getc(f);
+		}
+	} else if (sample->flags & IT_SAMPLE_16BIT)
+		for (n = 0; n < sample->length; n++)
+			((short *)sample->data)[n] = dumbfile_igetw(f);
+	else
+		for (n = 0; n < sample->length; n++)
+			((signed char *)sample->data)[n] = dumbfile_getc(f);
+
+	if (dumbfile_error(f))
+		return -1;
+
+	if (ffi != 1) {
+		/* Convert to signed. */
+		if (sample->flags & IT_SAMPLE_16BIT)
+			for (n = 0; n < datasize; n++)
+				((short *)sample->data)[n] ^= 0x8000;
+		else
+			for (n = 0; n < datasize; n++)
+				((signed char *)sample->data)[n] ^= 0x80;
+	}
+
+	return 0;
+}
+
+
+
+static int it_s3m_read_pattern(IT_PATTERN *pattern, DUMBFILE *f, unsigned char *buffer)
+{
+	int length;
+	int buflen = 0;
+	int bufpos = 0;
+
+	IT_ENTRY *entry;
+
+	unsigned char channel;
+
+	/* Haha, this is hilarious!
+	 *
+	 * Well, after some experimentation, it seems that different S3M writers
+	 * define the format in different ways. The S3M docs say that the first
+	 * two bytes hold the "length of [the] packed pattern", and the packed
+	 * pattern data follow. Judging by the contents of ARMANI.S3M, packaged
+	 * with ScreamTracker itself, the measure of length _includes_ the two
+	 * bytes used to store the length; in other words, we should read
+	 * (length - 2) more bytes. However, aryx.s3m, packaged with ModPlug
+	 * Tracker, excludes these two bytes, so (length) more bytes must be
+	 * read.
+	 *
+	 * Call me crazy, but I just find it insanely funny that the format was
+	 * misunderstood in this way :D
+	 *
+	 * Now we can't just risk reading two extra bytes, because then we
+	 * overshoot, and DUMBFILEs don't support backward seeking (for a good
+	 * reason). Luckily, there is a way. We can read the data little by
+	 * little, and stop when we have 64 rows in memory. Provided we protect
+	 * against buffer overflow, this method should work with all sensibly
+	 * written S3M files. If you find one for which it does not work, please
+	 * let me know at entheh@users.sf.net so I can look at it.
+     *
+     * "for a good reason" ? What's this nonsense? -kode54
+     *
+	 */
+
+	length = dumbfile_igetw(f);
+	
+	if (dumbfile_error(f) || !length)
+		return -1;
+
+	pattern->n_rows = 0;
+	pattern->n_entries = 0;
+
+	/* Read in the pattern data, little by little, and work out how many
+	 * entries we need room for. Sorry, but this is just so funny...
+	 */
+	for (;;) {
+		unsigned char b = buffer[buflen++] = dumbfile_getc(f);
+
+#if 1
+		static const unsigned char used[8] = {0, 2, 1, 3, 2, 4, 3, 5};
+		channel = b & 31;
+		b >>= 5;
+		pattern->n_entries++;
+		if (b) {
+			if (buflen + used[b] >= 65536) return -1;
+			if (buflen + used[b] <= length)
+                dumbfile_getnc((char *)buffer + buflen, used[b], f);
+			else
+				memset(buffer + buflen, 0, used[b]);
+			buflen += used[b];
+		} else {
+			/* End of row */
+			if (++pattern->n_rows == 64) break;
+			if (buflen >= 65536) return -1;
+		}
+#else
+		if (b == 0) {
+			/* End of row */
+			pattern->n_entries++;
+			if (++pattern->n_rows == 64) break;
+			if (buflen >= 65536) return -1;
+		} else {
+			static const unsigned char used[8] = {0, 2, 1, 3, 2, 4, 3, 5};
+			channel = b & 31;
+			b >>= 5;
+			if (b) {
+				pattern->n_entries++;
+				if (buflen + used[b] >= 65536) return -1;
+				dumbfile_getnc(buffer + buflen, used[b], f);
+				buflen += used[b];
+			}
+		}
+#endif
+
+		/* We have ensured that buflen < 65536 at this point, so it is safe
+		 * to iterate and read at least one more byte without checking.
+		 * However, now would be a good time to check for errors reading from
+		 * the file.
+		 */
+
+		if (dumbfile_error(f))
+			return -1;
+
+		/* Great. We ran out of data, but there should be data for more rows.
+		 * Fill the rest with null data...
+		 */
+		if (buflen >= length && pattern->n_rows < 64)
+		{
+			while (pattern->n_rows < 64)
+			{
+				if (buflen >= 65536) return -1;
+				buffer[buflen++] = 0;
+				pattern->n_entries++;
+				pattern->n_rows++;
+			}
+			break;
+		}
+	}
+
+	pattern->entry = malloc(pattern->n_entries * sizeof(*pattern->entry));
+
+	if (!pattern->entry)
+		return -1;
+
+	entry = pattern->entry;
+
+	while (bufpos < buflen) {
+		unsigned char b = buffer[bufpos++];
+
+#if 1
+		if (!(b & ~31))
+#else
+		if (b == 0)
+#endif
+		{
+			/* End of row */
+			IT_SET_END_ROW(entry);
+			entry++;
+			continue;
+		}
+
+		channel = b & 31;
+
+		if (b & 224) {
+			entry->mask = 0;
+			entry->channel = channel;
+
+			if (b & 32) {
+				unsigned char n = buffer[bufpos++];
+				if (n != 255) {
+					if (n == 254)
+						entry->note = IT_NOTE_CUT;
+					else
+						entry->note = (n >> 4) * 12 + (n & 15);
+					entry->mask |= IT_ENTRY_NOTE;
+				}
+
+				entry->instrument = buffer[bufpos++];
+				if (entry->instrument)
+					entry->mask |= IT_ENTRY_INSTRUMENT;
+			}
+
+			if (b & 64) {
+				entry->volpan = buffer[bufpos++];
+				if (entry->volpan != 255)
+					entry->mask |= IT_ENTRY_VOLPAN;
+			}
+
+			if (b & 128) {
+				entry->effect = buffer[bufpos++];
+				entry->effectvalue = buffer[bufpos++];
+				// XXX woot
+				if (entry->effect && entry->effect < IT_MIDI_MACRO /*!= 255*/) {
+					entry->mask |= IT_ENTRY_EFFECT;
+					switch (entry->effect) {
+					case IT_BREAK_TO_ROW:
+						entry->effectvalue -= (entry->effectvalue >> 4) * 6;
+						break;
+
+					case IT_SET_CHANNEL_VOLUME:
+					case IT_CHANNEL_VOLUME_SLIDE:
+					case IT_PANNING_SLIDE:
+					case IT_GLOBAL_VOLUME_SLIDE:
+					case IT_PANBRELLO:
+					case IT_MIDI_MACRO:
+						entry->mask &= ~IT_ENTRY_EFFECT;
+						break;
+
+					case IT_S:
+						switch (entry->effectvalue >> 4) {
+						case IT_S_SET_PANBRELLO_WAVEFORM:
+						case IT_S_FINE_PATTERN_DELAY:
+						case IT_S7:
+						case IT_S_SET_SURROUND_SOUND:
+						case IT_S_SET_MIDI_MACRO:
+							entry->mask &= ~IT_ENTRY_EFFECT;
+							break;
+						}
+						break;
+					}
+				}
+				/** WARNING: ARGH! CONVERT TEH EFFECTS!@~ */
+			}
+
+			entry++;
+		}
+	}
+
+	ASSERT(entry == pattern->entry + pattern->n_entries);
+
+	return 0;
+}
+
+
+
+/** WARNING: this is duplicated in itread.c - also bad practice to use the same struct name unless they are unified in a header */
+/* Currently we assume the sample data are stored after the sample headers in
+ * module files. This assumption may be unjustified; let me know if you have
+ * trouble.
+ */
+
+#define S3M_COMPONENT_INSTRUMENT 1
+#define S3M_COMPONENT_PATTERN    2
+#define S3M_COMPONENT_SAMPLE     3
+
+typedef struct S3M_COMPONENT
+{
+	unsigned char type;
+	unsigned char n;
+	int32 offset;
+	short sampfirst; /* component[sampfirst] = first sample data after this */
+	short sampnext; /* sampnext is used to create linked lists of sample data */
+}
+S3M_COMPONENT;
+
+
+
+static int CDECL s3m_component_compare(const void *e1, const void *e2)
+{
+	return ((const S3M_COMPONENT *)e1)->offset -
+	       ((const S3M_COMPONENT *)e2)->offset;
+}
+
+
+
+static DUMB_IT_SIGDATA *it_s3m_load_sigdata(DUMBFILE *f, int * cwtv)
+{
+	DUMB_IT_SIGDATA *sigdata;
+
+	int flags, ffi;
+	int default_pan_present;
+
+	int master_volume;
+
+	unsigned char sample_pack[256];
+
+	S3M_COMPONENT *component;
+	int n_components = 0;
+
+	int n;
+
+	unsigned char *buffer;
+
+	sigdata = malloc(sizeof(*sigdata));
+	if (!sigdata) return NULL;
+
+    dumbfile_getnc((char *)sigdata->name, 28, f);
+	sigdata->name[28] = 0;
+
+	n = dumbfile_getc(f);
+
+	if (n != 0x1A && n != 0) {
+		free(sigdata);
+		return NULL;
+	}
+
+	if (dumbfile_getc(f) != 16) {
+		free(sigdata);
+		return NULL;
+	}
+
+	dumbfile_skip(f, 2);
+
+	sigdata->song_message = NULL;
+	sigdata->order = NULL;
+	sigdata->instrument = NULL;
+	sigdata->sample = NULL;
+	sigdata->pattern = NULL;
+	sigdata->midi = NULL;
+	sigdata->checkpoint = NULL;
+
+	sigdata->n_orders = dumbfile_igetw(f);
+	sigdata->n_instruments = 0;
+	sigdata->n_samples = dumbfile_igetw(f);
+	sigdata->n_patterns = dumbfile_igetw(f);
+
+	if (dumbfile_error(f) || sigdata->n_orders <= 0 || sigdata->n_samples > 256 || sigdata->n_patterns > 256) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+	sigdata->order = malloc(sigdata->n_orders);
+	if (!sigdata->order) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+	if (sigdata->n_samples) {
+		sigdata->sample = malloc(sigdata->n_samples * sizeof(*sigdata->sample));
+		if (!sigdata->sample) {
+			_dumb_it_unload_sigdata(sigdata);
+			return NULL;
+		}
+		for (n = 0; n < sigdata->n_samples; n++)
+			sigdata->sample[n].data = NULL;
+	}
+
+	if (sigdata->n_patterns) {
+		sigdata->pattern = malloc(sigdata->n_patterns * sizeof(*sigdata->pattern));
+		if (!sigdata->pattern) {
+			_dumb_it_unload_sigdata(sigdata);
+			return NULL;
+		}
+		for (n = 0; n < sigdata->n_patterns; n++)
+			sigdata->pattern[n].entry = NULL;
+	}
+
+	flags = dumbfile_igetw(f);
+
+	*cwtv = dumbfile_igetw(f);
+
+	if (*cwtv == 0x1300) {
+		/** WARNING: volume slides on every frame */
+	}
+
+	ffi = dumbfile_igetw(f);
+
+	/** WARNING: which ones? */
+	sigdata->flags = IT_OLD_EFFECTS | IT_COMPATIBLE_GXX | IT_WAS_AN_S3M;
+
+	if (dumbfile_mgetl(f) != DUMB_ID('S','C','R','M')) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+	sigdata->global_volume = dumbfile_getc(f);
+	if ( !sigdata->global_volume || sigdata->global_volume > 64 ) sigdata->global_volume = 64;
+	sigdata->speed = dumbfile_getc(f);
+	if (sigdata->speed == 0) sigdata->speed = 6; // Should we? What about tempo?
+	sigdata->tempo = dumbfile_getc(f);
+	master_volume = dumbfile_getc(f); // 7 bits; +128 for stereo
+	sigdata->mixing_volume = master_volume & 127;
+
+	if (master_volume & 128) sigdata->flags |= IT_STEREO;
+
+	/* Skip GUS Ultra Click Removal byte. */
+	dumbfile_getc(f);
+
+	default_pan_present = dumbfile_getc(f);
+
+	dumbfile_skip(f, 8);
+
+	/* Skip Special Custom Data Pointer. */
+	/** WARNING: investigate this? */
+	dumbfile_igetw(f);
+
+	sigdata->n_pchannels = 0;
+	/* Channel settings for 32 channels, 255=unused, +128=disabled */
+	{
+		int i;
+		int sep = (7 * dumb_it_default_panning_separation + 50) / 100;
+		for (i = 0; i < 32; i++) {
+			int c = dumbfile_getc(f);
+			if (!(c & (128 | 16))) { /* +128=disabled, +16=Adlib */
+				if (sigdata->n_pchannels < i + 1) sigdata->n_pchannels = i + 1;
+				sigdata->channel_volume[i] = 64;
+				sigdata->channel_pan[i] = c & 8 ? 7 + sep : 7 - sep;
+				/** WARNING: ah, but it should be 7 for mono... */
+			} else {
+				/** WARNING: this could be improved if we support channel muting... */
+				sigdata->channel_volume[i] = 0;
+				sigdata->channel_pan[i] = 7;
+			}
+		}
+	}
+
+	/* Orders, byte each, length = sigdata->n_orders (should be even) */
+    dumbfile_getnc((char *)sigdata->order, sigdata->n_orders, f);
+	sigdata->restart_position = 0;
+
+	component = malloc(768*sizeof(*component));
+	if (!component) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+	for (n = 0; n < sigdata->n_samples; n++) {
+		component[n_components].type = S3M_COMPONENT_SAMPLE;
+		component[n_components].n = n;
+		component[n_components].offset = dumbfile_igetw(f) << 4;
+		component[n_components].sampfirst = -1;
+		n_components++;
+	}
+
+	for (n = 0; n < sigdata->n_patterns; n++) {
+		int32 offset = dumbfile_igetw(f) << 4;
+		if (offset) {
+			component[n_components].type = S3M_COMPONENT_PATTERN;
+			component[n_components].n = n;
+			component[n_components].offset = offset;
+			component[n_components].sampfirst = -1;
+			n_components++;
+		} else {
+			/** WARNING: Empty 64-row pattern ... ? (this does happen!) */
+			sigdata->pattern[n].n_rows = 64;
+			sigdata->pattern[n].n_entries = 0;
+		}
+	}
+
+	qsort(component, n_components, sizeof(S3M_COMPONENT), &s3m_component_compare);
+
+	/* I found a really dumb S3M file that claimed to contain default pan
+	 * data but didn't contain any. Programs would load it by reading part of
+	 * the first instrument header, assuming the data to be default pan
+	 * positions, and then rereading the instrument module. We cannot do this
+	 * without obfuscating the file input model, so we insert an extra check
+	 * here that we won't overrun the start of the first component.
+	 */
+	if (default_pan_present == 252 && component[0].offset >= dumbfile_pos(f) + 32) {
+		/* Channel default pan positions */
+		int i;
+		for (i = 0; i < 32; i++) {
+			int c = dumbfile_getc(f);
+			if (c & 32)
+				sigdata->channel_pan[i] = c & 15;
+		}
+	}
+
+	{
+		int i;
+		for (i = 0; i < 32; i++) {
+			sigdata->channel_pan[i] -= (sigdata->channel_pan[i] & 8) >> 3;
+			sigdata->channel_pan[i] = ((int)sigdata->channel_pan[i] << 5) / 7;
+		}
+	}
+
+	sigdata->pan_separation = 128;
+
+	if (dumbfile_error(f)) {
+		free(component);
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+	buffer = malloc(65536);
+	if (!buffer) {
+		free(component);
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+	for (n = 0; n < n_components; n++) {
+		int32 offset;
+		int m;
+
+		offset = 0;
+        if (dumbfile_seek(f, component[n].offset, DFS_SEEK_SET)) {
+			free(buffer);
+			free(component);
+			_dumb_it_unload_sigdata(sigdata);
+			return NULL;
+		}
+
+		switch (component[n].type) {
+
+			case S3M_COMPONENT_PATTERN:
+                if (it_s3m_read_pattern(&sigdata->pattern[component[n].n], f, buffer)) {
+					free(buffer);
+					free(component);
+					_dumb_it_unload_sigdata(sigdata);
+					return NULL;
+				}
+				break;
+
+			case S3M_COMPONENT_SAMPLE:
+				if (it_s3m_read_sample_header(&sigdata->sample[component[n].n], &offset, &sample_pack[component[n].n], *cwtv, f)) {
+					free(buffer);
+					free(component);
+					_dumb_it_unload_sigdata(sigdata);
+					return NULL;
+				}
+
+				if (sigdata->sample[component[n].n].flags & IT_SAMPLE_EXISTS) {
+					short *sample;
+
+					for (m = n + 1; m < n_components; m++)
+						if (component[m].offset > offset)
+							break;
+					m--;
+
+					sample = &component[m].sampfirst;
+
+					while (*sample >= 0 && component[*sample].offset <= offset)
+						sample = &component[*sample].sampnext;
+
+					component[n].sampnext = *sample;
+					*sample = n;
+
+					component[n].offset = offset;
+				}
+		}
+
+		m = component[n].sampfirst;
+
+		while (m >= 0) {
+			// XXX
+                if (dumbfile_seek(f, component[m].offset, DFS_SEEK_SET)) {
+					free(buffer);
+					free(component);
+					_dumb_it_unload_sigdata(sigdata);
+					return NULL;
+				}
+
+				if (it_s3m_read_sample_data(&sigdata->sample[component[m].n], ffi, sample_pack[component[m].n], f)) {
+					free(buffer);
+					free(component);
+					_dumb_it_unload_sigdata(sigdata);
+					return NULL;
+				}
+
+			m = component[m].sampnext;
+		}
+	}
+
+	free(buffer);
+	free(component);
+
+	_dumb_it_fix_invalid_orders(sigdata);
+
+	return sigdata;
+}
+
+static char hexdigit(int in)
+{
+	if (in < 10) return in + '0';
+	else return in + 'A' - 10;
+}
+
+DUH *DUMBEXPORT dumb_read_s3m_quick(DUMBFILE *f)
+{
+	sigdata_t *sigdata;
+	int cwtv;
+
+	DUH_SIGTYPE_DESC *descptr = &_dumb_sigtype_it;
+
+	sigdata = it_s3m_load_sigdata(f, &cwtv);
+
+	if (!sigdata)
+		return NULL;
+
+	{
+		char version[8];
+		const char *tag[3][2];
+		tag[0][0] = "TITLE";
+        tag[0][1] = (const char *)(((DUMB_IT_SIGDATA *)sigdata)->name);
+		tag[1][0] = "FORMAT";
+		tag[1][1] = "S3M";
+		tag[2][0] = "TRACKERVERSION";
+		version[0] = hexdigit((cwtv >> 8) & 15);
+		version[1] = '.';
+		version[2] = hexdigit((cwtv >> 4) & 15);
+		version[3] = hexdigit(cwtv & 15);
+		version[4] = 0;
+		tag[2][1] = (const char *) &version;
+		return make_duh(-1, 3, (const char *const (*)[2])tag, 1, &descptr, &sigdata);
+	}
+}
diff --git a/libraries/dumb/src/it/reads3m2.c b/libraries/dumb/src/it/reads3m2.c
new file mode 100644
index 000000000..e7d34de33
--- /dev/null
+++ b/libraries/dumb/src/it/reads3m2.c
@@ -0,0 +1,29 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * reads3m2.c - Function to read a ScreamTracker 3    / / \  \
+ *              module from an open file and do an   | <  /   \_
+ *              initial run-through.                 |  \/ /\   /
+ *                                                    \_  /  > /
+ * Split off from reads3m.c by entheh.                  | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+
+
+
+DUH *DUMBEXPORT dumb_read_s3m(DUMBFILE *f)
+{
+	DUH *duh = dumb_read_s3m_quick(f);
+	dumb_it_do_initial_runthrough(duh);
+	return duh;
+}
diff --git a/libraries/dumb/src/it/readstm.c b/libraries/dumb/src/it/readstm.c
new file mode 100644
index 000000000..a2ae69033
--- /dev/null
+++ b/libraries/dumb/src/it/readstm.c
@@ -0,0 +1,397 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * readstm.c - Code to read a ScreamTracker 2         / / \  \
+ *             module from an open file.             | <  /   \_
+ *                                                   |  \/ /\   /
+ * By Chris Moeller.                                  \_  /  > /
+ *                                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+// IT_STEREO... :o
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+
+#include "dumb.h"
+#include "internal/it.h"
+
+#ifdef _MSC_VER
+	#define strnicmp _strnicmp
+#else
+	#if defined(unix) || defined(__unix__) || defined(__unix)
+		#include <strings.h>
+	#endif
+	#define strnicmp strncasecmp
+#endif
+
+static int it_stm_read_sample_header( IT_SAMPLE *sample, DUMBFILE *f, unsigned short *offset )
+{
+    dumbfile_getnc( (char *) sample->filename, 12, f );
+	sample->filename[12] = 0;
+
+	memcpy( sample->name, sample->filename, 13 );
+
+	dumbfile_skip( f, 2 );
+
+	*offset = dumbfile_igetw( f );
+
+	sample->length = dumbfile_igetw( f );
+	sample->loop_start = dumbfile_igetw( f );
+	sample->loop_end = dumbfile_igetw( f );
+
+	sample->default_volume = dumbfile_getc( f );
+
+	dumbfile_skip( f, 1 );
+
+	sample->C5_speed = dumbfile_igetw( f ) << 3;
+
+	dumbfile_skip( f, 6 );
+
+	if ( sample->length < 4 || !sample->default_volume ) {
+		/* Looks like no-existy. */
+		sample->flags &= ~IT_SAMPLE_EXISTS;
+		sample->length = 0;
+		*offset = 0;
+		return dumbfile_error( f );
+	}
+
+	sample->flags = IT_SAMPLE_EXISTS;
+	sample->global_volume = 64;
+	sample->default_pan = 0; // 0 = don't use, or 160 = centre?
+
+	if ( ( sample->loop_start < sample->length ) &&
+		( sample->loop_end > sample->loop_start ) &&
+		( sample->loop_end != 0xFFFF ) ) {
+		sample->flags |= IT_SAMPLE_LOOP;
+		if ( sample->loop_end > sample->length ) sample->loop_end = sample->length;
+	}
+
+	//Do we need to set all these?
+	sample->vibrato_speed = 0;
+	sample->vibrato_depth = 0;
+	sample->vibrato_rate = 0;
+	sample->vibrato_waveform = IT_VIBRATO_SINE;
+	sample->finetune = 0;
+	sample->max_resampling_quality = -1;
+
+	return dumbfile_error(f);
+}
+
+static int it_stm_read_sample_data( IT_SAMPLE *sample, DUMBFILE * f )
+{
+	if ( ! sample->length ) return 0;
+
+	sample->data = malloc( sample->length );
+	if (!sample->data)
+		return -1;
+
+    dumbfile_getnc( sample->data, sample->length, f );
+
+    return dumbfile_error( f );
+}
+
+static int it_stm_read_pattern( IT_PATTERN *pattern, DUMBFILE *f, unsigned char *buffer )
+{
+	int pos;
+	int channel;
+	int row;
+	IT_ENTRY *entry;
+
+	pattern->n_rows = 64;
+
+    if ( dumbfile_getnc( (char *) buffer, 64 * 4 * 4, f ) != 64 * 4 * 4 )
+		return -1;
+
+	pattern->n_entries = 64;
+	pos = 0;
+	for ( row = 0; row < 64; ++row ) {
+		for ( channel = 0; channel < 4; ++channel ) {
+			if ( buffer[ pos + 0 ] | buffer[ pos + 1 ] | buffer[ pos + 2 ] | buffer[ pos + 3 ] )
+				++pattern->n_entries;
+			pos += 4;
+		}
+	}
+
+	pattern->entry = malloc( pattern->n_entries * sizeof( *pattern->entry ) );
+	if ( !pattern->entry )
+		return -1;
+
+	entry = pattern->entry;
+	pos = 0;
+	for ( row = 0; row < 64; ++row ) {
+		for ( channel = 0; channel < 4; ++channel ) {
+			if ( buffer[ pos + 0 ] | buffer[ pos + 1 ] | buffer[ pos + 2 ] | buffer[ pos + 3 ] ) {
+				unsigned note;
+				note = buffer[ pos + 0 ];
+				entry->channel = channel;
+				entry->mask = 0;
+				entry->instrument = buffer[ pos + 1 ] >> 3;
+				entry->volpan = ( buffer[ pos + 1 ] & 0x07 ) + ( buffer[ pos + 2 ] >> 1 );
+				entry->effect = buffer[ pos + 2 ] & 0x0F;
+				entry->effectvalue = buffer[ pos + 3 ];
+				if ( entry->instrument && entry->instrument < 32 )
+					entry->mask |= IT_ENTRY_INSTRUMENT;
+				if ( note < 251 ) {
+					entry->mask |= IT_ENTRY_NOTE;
+					entry->note = ( note >> 4 ) * 12 + ( note & 0x0F );
+				}
+				if ( entry->volpan <= 64 )
+					entry->mask |= IT_ENTRY_VOLPAN;
+				entry->mask |= IT_ENTRY_EFFECT;
+				switch ( entry->effect ) {
+                    case IT_SET_SPEED:
+                    /* taken care of in the renderer */
+                        break;
+
+					case IT_BREAK_TO_ROW:
+						entry->effectvalue -= (entry->effectvalue >> 4) * 6;
+						break;
+
+					case IT_JUMP_TO_ORDER:
+					case IT_VOLUME_SLIDE:
+					case IT_PORTAMENTO_DOWN:
+					case IT_PORTAMENTO_UP:
+					case IT_TONE_PORTAMENTO:
+					case IT_VIBRATO:
+					case IT_TREMOR:
+					case IT_ARPEGGIO:
+					case IT_VOLSLIDE_VIBRATO:
+					case IT_VOLSLIDE_TONEPORTA:
+						break;
+
+					default:
+						entry->mask &= ~IT_ENTRY_EFFECT;
+						break;
+				}
+				if ( entry->mask ) ++entry;
+			}
+			pos += 4;
+		}
+		IT_SET_END_ROW(entry);
+		++entry;
+	}
+
+	pattern->n_entries = (int)(entry - pattern->entry);
+
+	return 0;
+}
+
+
+
+static DUMB_IT_SIGDATA *it_stm_load_sigdata(DUMBFILE *f, int * version)
+{
+	DUMB_IT_SIGDATA *sigdata;
+
+	char tracker_name[ 8 ];
+
+	unsigned short sample_offset[ 31 ];
+
+    int n;
+
+	sigdata = malloc(sizeof(*sigdata));
+	if (!sigdata) return NULL;
+
+	/* Skip song name. */
+    dumbfile_getnc((char *)sigdata->name, 20, f);
+	sigdata->name[20] = 0;
+
+	dumbfile_getnc(tracker_name, 8, f);
+	n = dumbfile_getc(f);
+	if ( n != 0x02 && n != 0x1A && n != 0x1B )
+	{
+		free( sigdata );
+		return NULL;
+	}
+	if ( dumbfile_getc(f) != 2 ) /* only support modules */
+	{
+		free( sigdata );
+		return NULL;
+	}
+	if ( strnicmp( tracker_name, "!Scream!", 8 ) &&
+		strnicmp( tracker_name, "BMOD2STM", 8 ) &&
+		strnicmp( tracker_name, "WUZAMOD!", 8 ) )
+	{
+		free( sigdata );
+		return NULL;
+	}
+
+	*version = dumbfile_mgetw(f);
+
+	sigdata->song_message = NULL;
+	sigdata->order = NULL;
+	sigdata->instrument = NULL;
+	sigdata->sample = NULL;
+	sigdata->pattern = NULL;
+	sigdata->midi = NULL;
+	sigdata->checkpoint = NULL;
+
+	sigdata->n_instruments = 0;
+	sigdata->n_samples = 31;
+	sigdata->n_pchannels = 4;
+
+    sigdata->tempo = 125;
+    sigdata->mixing_volume = 48;
+	sigdata->pan_separation = 128;
+
+	/** WARNING: which ones? */
+    sigdata->flags = IT_OLD_EFFECTS | IT_COMPATIBLE_GXX | IT_WAS_AN_S3M | IT_WAS_AN_STM | IT_STEREO;
+
+    n = dumbfile_getc(f);
+    if ( n < 32 ) n = 32;
+    sigdata->speed = n;
+    sigdata->n_patterns = dumbfile_getc(f);
+	sigdata->global_volume = dumbfile_getc(f) << 1;
+	if ( sigdata->global_volume > 128 ) sigdata->global_volume = 128;
+	
+	dumbfile_skip(f, 13);
+
+	if ( dumbfile_error(f) || sigdata->n_patterns < 1 || sigdata->n_patterns > 99 ) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+	sigdata->sample = malloc(sigdata->n_samples * sizeof(*sigdata->sample));
+	if (!sigdata->sample) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+	for (n = 0; n < sigdata->n_samples; n++)
+		sigdata->sample[n].data = NULL;
+
+	if (sigdata->n_patterns) {
+		sigdata->pattern = malloc(sigdata->n_patterns * sizeof(*sigdata->pattern));
+		if (!sigdata->pattern) {
+			_dumb_it_unload_sigdata(sigdata);
+			return NULL;
+		}
+		for (n = 0; n < sigdata->n_patterns; n++)
+			sigdata->pattern[n].entry = NULL;
+	}
+
+	memset( sigdata->channel_volume, 64, 4 );
+	n = 32 * dumb_it_default_panning_separation / 100;
+	sigdata->channel_pan[ 0 ] = 32 + n;
+	sigdata->channel_pan[ 1 ] = 32 - n;
+	sigdata->channel_pan[ 2 ] = 32 + n;
+	sigdata->channel_pan[ 3 ] = 32 - n;
+
+	for ( n = 0; n < sigdata->n_samples; ++n ) {
+		if ( it_stm_read_sample_header( &sigdata->sample[ n ], f, &sample_offset[ n ] ) ) {
+			_dumb_it_unload_sigdata( sigdata );
+			return NULL;
+		}
+	}
+
+	sigdata->order = malloc( 128 );
+	if ( !sigdata->order ) {
+		_dumb_it_unload_sigdata( sigdata );
+		return NULL;
+	}
+
+	/* Orders, byte each, length = sigdata->n_orders (should be even) */
+    dumbfile_getnc( (char *) sigdata->order, *version >= 0x200 ? 128 : 64, f );
+	if (*version < 0x200) memset( sigdata->order + 64, 0xFF, 64 );
+	sigdata->restart_position = 0;
+
+	for ( n = 127; n >= 0; --n ) {
+		if ( sigdata->order[ n ] < sigdata->n_patterns ) break;
+	}
+	if ( n < 0 ) {
+		_dumb_it_unload_sigdata( sigdata );
+		return NULL;
+	}
+	sigdata->n_orders = n + 1;
+
+	for ( n = 0; n < 128; ++n ) {
+		if ( sigdata->order[ n ] >= 99 ) sigdata->order[ n ] = 0xFF;
+	}
+
+	if ( sigdata->n_patterns ) {
+		unsigned char * buffer = malloc( 64 * 4 * 4 );
+		if ( ! buffer ) {
+			_dumb_it_unload_sigdata( sigdata );
+			return NULL;
+		}
+		for ( n = 0; n < sigdata->n_patterns; ++n ) {
+			if ( it_stm_read_pattern( &sigdata->pattern[ n ], f, buffer ) ) {
+				free( buffer );
+				_dumb_it_unload_sigdata( sigdata );
+				return NULL;
+			}
+		}
+		free( buffer );
+	}
+
+    for ( n = 0; n < sigdata->n_samples; ++n ) {
+        if ( sample_offset[ n ] )
+        {
+            if ( dumbfile_seek( f, sample_offset[ n ] * 16, DFS_SEEK_SET ) ||
+                 it_stm_read_sample_data( &sigdata->sample[ n ], f ) ) {
+                _dumb_it_unload_sigdata( sigdata );
+                return NULL;
+            }
+        }
+        else
+        {
+            sigdata->sample[ n ].flags = 0;
+            sigdata->sample[ n ].length = 0;
+        }
+    }
+
+	_dumb_it_fix_invalid_orders(sigdata);
+
+	return sigdata;
+}
+
+DUH *DUMBEXPORT dumb_read_stm_quick(DUMBFILE *f)
+{
+	sigdata_t *sigdata;
+	int ver;
+
+	DUH_SIGTYPE_DESC *descptr = &_dumb_sigtype_it;
+
+	sigdata = it_stm_load_sigdata(f , &ver);
+
+	if (!sigdata)
+		return NULL;
+
+	{
+		char version[16];
+		const char *tag[2][2];
+		tag[0][0] = "TITLE";
+        tag[0][1] = (const char *)(((DUMB_IT_SIGDATA *)sigdata)->name);
+		tag[1][0] = "FORMAT";
+		version[0] = 'S';
+		version[1] = 'T';
+		version[2] = 'M';
+		version[3] = ' ';
+		version[4] = 'v';
+		version[5] = '0' + ((ver >> 8) & 15);
+		version[6] = '.';
+		if ((ver & 255) > 99)
+		{
+			version[7] = '0' + ((ver & 255) / 100 );
+			version[8] = '0' + (((ver & 255) / 10) % 10);
+			version[9] = '0' + ((ver & 255) % 10);
+			version[10] = 0;
+		}
+		else
+		{
+			version[7] = '0' + ((ver & 255) / 10);
+			version[8] = '0' + ((ver & 255) % 10);
+			version[9] = 0;
+		}
+		tag[1][1] = (const char *) &version;
+		return make_duh(-1, 2, (const char *const (*)[2])tag, 1, &descptr, &sigdata);
+	}
+}
diff --git a/libraries/dumb/src/it/readstm2.c b/libraries/dumb/src/it/readstm2.c
new file mode 100644
index 000000000..bd78eaf69
--- /dev/null
+++ b/libraries/dumb/src/it/readstm2.c
@@ -0,0 +1,29 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * readstm2.c - Function to read a ScreamTracker 2    / / \  \
+ *              module from an open file and do an   | <  /   \_
+ *              initial run-through.                 |  \/ /\   /
+ *                                                    \_  /  > /
+ * By Chris Moeller.                                    | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+
+
+
+DUH *DUMBEXPORT dumb_read_stm(DUMBFILE *f)
+{
+	DUH *duh = dumb_read_stm_quick(f);
+	dumb_it_do_initial_runthrough(duh);
+	return duh;
+}
diff --git a/libraries/dumb/src/it/readxm.c b/libraries/dumb/src/it/readxm.c
new file mode 100644
index 000000000..b26359f64
--- /dev/null
+++ b/libraries/dumb/src/it/readxm.c
@@ -0,0 +1,1530 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * readxm.c - Code to read a Fast Tracker II          / / \  \
+ *            module from an open file.              | <  /   \_
+ *                                                   |  \/ /\   /
+ * By Julien Cugniere. Some bits of code taken        \_  /  > /
+ * from reads3m.c.                                      | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "dumb.h"
+#include "internal/it.h"
+#include "internal/dumbfile.h"
+
+#include <stdlib.h>
+#include <assert.h>
+
+extern short *DUMBCALLBACK dumb_decode_vorbis(int outlen, const void *oggstream, int sizebytes);
+
+/** TODO:
+
+ * XM_TREMOLO                        doesn't sound quite right...
+ * XM_SET_ENVELOPE_POSITION          todo.
+
+ * VIBRATO conversion needs to be checked (sample/effect/volume). Plus check
+   that effect memory is correct when using XM_VOLSLIDE_VIBRATO.
+   - sample vibrato (instrument vibrato) is now handled correctly. - entheh
+
+ * XM_E_SET_VIBRATO/TREMOLO_CONTROL: effectvalue&4 -> don't retrig wave when
+   a new instrument is played. In retrigger_note()?. Is it worth implementing?
+
+ * Lossy fadeout approximation. 0..31 converted to 0 --> won't fade at all.
+
+ * Replace DUMB's sawtooth by ramp_down/ramp_up. Update XM loader.
+
+ * A lot of things need to be reset when the end of the song is reached.
+
+ * It seems that IT and XM don't behave the same way when dealing with
+   mixed loops. When IT encounters multiple SBx (x>0) commands on the same
+   row, it decrements the loop count for all, but only execute the loop of
+   the last one (highest channel). FT2 only decrements the loop count of the
+   last one. Not that I know of any modules using so convoluted combinations!
+
+ * Maybe we could remove patterns that don't appear in the order table ? Or
+   provide a function to "optimize" a DUMB_IT_SIGDATA ?
+
+*/
+
+
+
+#define XM_LINEAR_FREQUENCY        1 /* otherwise, use amiga slides */
+
+#define XM_ENTRY_PACKED            128
+#define XM_ENTRY_NOTE              1
+#define XM_ENTRY_INSTRUMENT        2
+#define XM_ENTRY_VOLUME            4
+#define XM_ENTRY_EFFECT            8
+#define XM_ENTRY_EFFECTVALUE       16
+
+#define XM_NOTE_OFF                97
+
+#define XM_ENVELOPE_ON             1
+#define XM_ENVELOPE_SUSTAIN        2
+#define XM_ENVELOPE_LOOP           4
+
+#define XM_SAMPLE_NO_LOOP          0
+#define XM_SAMPLE_FORWARD_LOOP     1
+#define XM_SAMPLE_PINGPONG_LOOP    2
+#define XM_SAMPLE_16BIT            16
+#define XM_SAMPLE_STEREO           32
+
+#define XM_VIBRATO_SINE            0
+#define XM_VIBRATO_SQUARE          1
+#define XM_VIBRATO_RAMP_DOWN       2
+#define XM_VIBRATO_RAMP_UP         3
+
+
+
+/* Probably useless :) */
+const char xm_convert_vibrato[] = {
+	IT_VIBRATO_SINE,
+	IT_VIBRATO_XM_SQUARE,
+	IT_VIBRATO_RAMP_DOWN,
+	IT_VIBRATO_RAMP_UP,
+	IT_VIBRATO_RANDOM
+};
+
+
+
+#define XM_MAX_SAMPLES_PER_INSTRUMENT 16
+
+
+
+/* Extra data that doesn't fit inside IT_INSTRUMENT */
+typedef struct XM_INSTRUMENT_EXTRA
+{
+	int n_samples;
+	int vibrato_type;
+	int vibrato_sweep; /* 0-0xFF */
+	int vibrato_depth; /* 0-0x0F */
+	int vibrato_speed; /* 0-0x3F */
+	int sample_header_size;
+}
+XM_INSTRUMENT_EXTRA;
+
+
+
+/* Trims off trailing white space, usually added by the tracker on file creation
+ */
+static void trim_whitespace(char *ptr, size_t size)
+{
+	char *p = ptr + size - 1;
+	while (p >= ptr && *p <= 0x20) *p-- = '\0';
+}
+
+/* Frees the original block if it can't resize it or if size is 0, and acts
+ * as malloc if ptr is NULL.
+ */
+static void *safe_realloc(void *ptr, size_t size)
+{
+	if (ptr == NULL)
+		return malloc(size);
+
+	if (size == 0) {
+		free(ptr);
+		return NULL;
+	} else {
+		void *new_block = realloc(ptr, size);
+		if (!new_block)
+			free(ptr);
+		return new_block;
+	}
+}
+
+
+
+/* The interpretation of the XM volume column is left to the player. Here, we
+ * just filter bad values.
+ */
+// This function is so tiny now, should we inline it?
+static void it_xm_convert_volume(int volume, IT_ENTRY *entry)
+{
+	entry->mask |= IT_ENTRY_VOLPAN;
+	entry->volpan = volume;
+
+	switch (volume >> 4) {
+		case 0xA: /* set vibrato speed */
+		case 0xB: /* vibrato */
+		case 0xF: /* tone porta */
+		case 0x6: /* vol slide up */
+		case 0x7: /* vol slide down */
+		case 0x8: /* fine vol slide up */
+		case 0x9: /* fine vol slide down */
+		case 0xC: /* set panning */
+		case 0xD: /* pan slide left */
+		case 0xE: /* pan slide right */
+		case 0x1: /* set volume */
+		case 0x2: /* set volume */
+		case 0x3: /* set volume */
+		case 0x4: /* set volume */
+			break;
+
+		case 0x5:
+			if (volume == 0x50)
+				break; /* set volume */
+			/* else fall through */
+
+		default:
+			entry->mask &= ~IT_ENTRY_VOLPAN;
+			break;
+	}
+}
+
+
+
+static int it_xm_read_pattern(IT_PATTERN *pattern, DUMBFILE *f, int n_channels, unsigned char *buffer, int version)
+{
+	int size;
+	int pos;
+	int channel;
+	int row;
+	int effect, effectvalue;
+	IT_ENTRY *entry;
+
+	/* pattern header size */
+	if (dumbfile_igetl(f) != ( version == 0x0102 ? 0x08 : 0x09 ) ) {
+		TRACE("XM error: unexpected pattern header size\n");
+		return -1;
+	}
+
+	/* pattern data packing type */
+	if (dumbfile_getc(f) != 0) {
+		TRACE("XM error: unexpected pattern packing type\n");
+		return -1;
+	}
+
+	if ( version == 0x0102 )
+		pattern->n_rows = dumbfile_getc(f) + 1;
+	else
+		pattern->n_rows = dumbfile_igetw(f);  /* 1..256 */
+	size = dumbfile_igetw(f);
+	pattern->n_entries = 0;
+
+	if (dumbfile_error(f))
+		return -1;
+
+	if (size == 0)
+		return 0;
+
+	if (size > 1280 * n_channels) {
+		TRACE("XM error: pattern data size > %d bytes\n", 1280 * n_channels);
+		return -1;
+	}
+
+    if (dumbfile_getnc((char *)buffer, size, f) < size)
+		return -1;
+
+	/* compute number of entries */
+	pattern->n_entries = 0;
+	pos = channel = row = 0;
+	while (pos < size) {
+		if (!(buffer[pos] & XM_ENTRY_PACKED) || (buffer[pos] & 31))
+			pattern->n_entries++;
+
+		channel++;
+		if (channel >= n_channels) {
+			channel = 0;
+			row++;
+			pattern->n_entries++;
+		}
+
+		if (buffer[pos] & XM_ENTRY_PACKED) {
+			static const char offset[] = { 0, 1, 1, 2, 1, 2, 2, 3,   1, 2, 2, 3, 2, 3, 3, 4,
+			                               1, 2, 2, 3, 2, 3, 3, 4,   2, 3, 3, 4, 3, 4, 4, 5 };
+			pos += 1 + offset[buffer[pos] & 31];
+		} else {
+			pos += 5;
+		}
+	}
+
+	if (row > pattern->n_rows) {
+		TRACE("XM error: wrong number of rows in pattern data\n");
+		return -1;
+	}
+
+	/* Whoops, looks like some modules may be short, a few channels, maybe even rows... */
+
+	while (row < pattern->n_rows)
+	{
+		pattern->n_entries++;
+		row++;
+	}
+
+	pattern->entry = malloc(pattern->n_entries * sizeof(*pattern->entry));
+	if (!pattern->entry)
+		return -1;
+
+	/* read the entries */
+	entry = pattern->entry;
+	pos = channel = row = 0;
+	while (pos < size) {
+		unsigned char mask;
+
+		if (buffer[pos] & XM_ENTRY_PACKED)
+			mask = buffer[pos++] & 31;
+		else
+			mask = 31;
+
+		if (mask) {
+			ASSERT(entry < pattern->entry + pattern->n_entries);
+
+			entry->channel = channel;
+			entry->mask = 0;
+
+			if (mask & XM_ENTRY_NOTE) {
+				int note = buffer[pos++]; /* 1-96 <=> C0-B7 */
+				entry->note = (note == XM_NOTE_OFF) ? (IT_NOTE_OFF) : (note-1);
+				entry->mask |= IT_ENTRY_NOTE;
+			}
+
+			if (mask & XM_ENTRY_INSTRUMENT) {
+				entry->instrument = buffer[pos++]; /* 1-128 */
+				entry->mask |= IT_ENTRY_INSTRUMENT;
+			}
+
+			if (mask & XM_ENTRY_VOLUME)
+				it_xm_convert_volume(buffer[pos++], entry);
+
+			effect = effectvalue = 0;
+			if (mask & XM_ENTRY_EFFECT)      effect = buffer[pos++];
+			if (mask & XM_ENTRY_EFFECTVALUE) effectvalue = buffer[pos++];
+			_dumb_it_xm_convert_effect(effect, effectvalue, entry, 0);
+
+			entry++;
+		}
+
+		channel++;
+		if (channel >= n_channels) {
+			channel = 0;
+			row++;
+			IT_SET_END_ROW(entry);
+			entry++;
+		}
+	}
+
+	while (row < pattern->n_rows)
+	{
+		row++;
+		IT_SET_END_ROW(entry);
+		entry++;
+	}
+
+	return 0;
+}
+
+
+
+static int it_xm_make_envelope(IT_ENVELOPE *envelope, const unsigned short *data, int y_offset)
+{
+    int i, pos, val;
+
+	if (envelope->n_nodes > 12) {
+		/* XXX
+		TRACE("XM error: wrong number of envelope nodes (%d)\n", envelope->n_nodes);
+		envelope->n_nodes = 0;
+		return -1; */
+		envelope->n_nodes = 12;
+	}
+
+	if (envelope->sus_loop_start >= 12) envelope->flags &= ~IT_ENVELOPE_SUSTAIN_LOOP;
+	if (envelope->loop_end >= 12) envelope->loop_end = 0;
+	if (envelope->loop_start >= envelope->loop_end) envelope->flags &= ~IT_ENVELOPE_LOOP_ON;
+
+	pos = 0;
+	for (i = 0; i < envelope->n_nodes; i++) {
+		envelope->node_t[i] = data[pos++];
+        val = data[pos++];
+        if (val > 64) {
+            TRACE("XM error: out-of-range envelope node (node_y[%d]=%d)\n", i, val);
+            /* FT2 seems to simply clip the value */
+            val = 64;
+		}
+        envelope->node_y[i] = (signed char)(val + y_offset);
+	}
+
+	return 0;
+}
+
+
+
+typedef struct LIMITED_XM LIMITED_XM;
+
+struct LIMITED_XM
+{
+	unsigned char *buffered;
+	long ptr, limit, allocated;
+	DUMBFILE *remaining;
+};
+
+static int DUMBCALLBACK limit_xm_resize(void *f, long n)
+{
+	DUMBFILE *df = f;
+	LIMITED_XM *lx = df->file;
+	if (lx->buffered || n) {
+		if (n > lx->allocated) {
+			unsigned char *buffered = realloc( lx->buffered, n );
+			if ( !buffered ) return -1;
+			lx->buffered = buffered;
+			memset( buffered + lx->allocated, 0, n - lx->allocated );
+			lx->allocated = n;
+		}
+        if ( dumbfile_getnc( (char *)lx->buffered, n, lx->remaining ) < n ) return -1;
+	} else if (!n) {
+		if ( lx->buffered ) free( lx->buffered );
+		lx->buffered = NULL;
+		lx->allocated = 0;
+	}
+	lx->limit = n;
+	lx->ptr = 0;
+	return 0;
+}
+
+static int DUMBCALLBACK limit_xm_skip_end(void *f, int32 n)
+{
+	DUMBFILE *df = f;
+	LIMITED_XM *lx = df->file;
+	return dumbfile_skip( lx->remaining, n );
+}
+
+static int DUMBCALLBACK limit_xm_skip(void *f, long n)
+{
+	LIMITED_XM *lx = f;
+	lx->ptr += n;
+	return 0;
+}
+
+
+
+static int DUMBCALLBACK limit_xm_getc(void *f)
+{
+	LIMITED_XM *lx = f;
+	if (lx->ptr >= lx->allocated) {
+		return 0;
+	}
+	return lx->buffered[lx->ptr++];
+}
+
+
+
+static int32 DUMBCALLBACK limit_xm_getnc(char *ptr, int32 n, void *f)
+{
+	LIMITED_XM *lx = f;
+	int left;
+	left = lx->allocated - lx->ptr;
+	if (n > left) {
+		if (left > 0) {
+			memcpy( ptr, lx->buffered + lx->ptr, left );
+			memset( ptr + left, 0, n - left );
+		} else {
+			memset( ptr, 0, n );
+		}
+	} else {
+		memcpy( ptr, lx->buffered + lx->ptr, n );
+	}
+	lx->ptr += n;
+	return n;
+}
+
+
+
+static void DUMBCALLBACK limit_xm_close(void *f)
+{
+	LIMITED_XM *lx = f;
+	if (lx->buffered) free(lx->buffered);
+	/* Do NOT close lx->remaining */
+	free(f);
+}
+
+
+
+/* These two can be stubs since this implementation doesn't use seeking */
+static int DUMBCALLBACK limit_xm_seek(void *f, long n)
+{
+    (void)f;
+    (void)n;
+    return 1;
+}
+
+
+
+static long DUMBCALLBACK limit_xm_get_size(void *f)
+{
+    (void)f;
+    return 0;
+}
+
+
+
+DUMBFILE_SYSTEM limit_xm_dfs = {
+	NULL,
+	&limit_xm_skip,
+	&limit_xm_getc,
+	&limit_xm_getnc,
+    &limit_xm_close,
+    &limit_xm_seek,
+    &limit_xm_get_size
+};
+
+static DUMBFILE *dumbfile_limit_xm(DUMBFILE *f)
+{
+	LIMITED_XM * lx = malloc(sizeof(*lx));
+	lx->remaining = f;
+	lx->buffered = NULL;
+	lx->ptr = 0;
+	lx->limit = 0;
+	lx->allocated = 0;
+	return dumbfile_open_ex( lx, &limit_xm_dfs );
+}
+
+static int it_xm_read_instrument(IT_INSTRUMENT *instrument, XM_INSTRUMENT_EXTRA *extra, DUMBFILE *f)
+{
+	uint32 size, bytes_read;
+	unsigned short vol_points[24];
+	unsigned short pan_points[24];
+	int i, type;
+	const unsigned long max_size = 4 + 22 + 1 + 2 + 4 + 96 + 48 + 48 + 1 * 14 + 2 + 2;
+	unsigned long skip_end = 0;
+
+	/* Header size. Tends to be more than the actual size of the structure.
+	 * So unread bytes must be skipped before reading the first sample
+	 * header.
+	 */
+
+	if ( limit_xm_resize( f, 4 ) < 0 ) return -1;
+
+	size = dumbfile_igetl(f);
+
+	if ( size == 0 ) size = max_size;
+	else if ( size > max_size )
+	{
+		skip_end = size - max_size;
+		size = max_size;
+	}
+
+	if ( limit_xm_resize( f, size - 4 ) < 0 ) return -1;
+
+    dumbfile_getnc((char *)instrument->name, 22, f);
+	instrument->name[22] = 0;
+    trim_whitespace((char *)instrument->name, 22);
+	instrument->filename[0] = 0;
+	dumbfile_skip(f, 1);  /* Instrument type. Should be 0, but seems random. */
+	extra->n_samples = dumbfile_igetw(f);
+
+	if (dumbfile_error(f) || (unsigned int)extra->n_samples > XM_MAX_SAMPLES_PER_INSTRUMENT)
+		return -1;
+
+	bytes_read = 4 + 22 + 1 + 2;
+
+	if (extra->n_samples) {
+		/* sample header size */
+		/*i = dumbfile_igetl(f);
+		if (!i || i > 0x28) i = 0x28;*/
+		dumbfile_skip(f, 4);
+		i = 0x28;
+		extra->sample_header_size = i;
+
+		/* sample map */
+		for (i = 0; i < 96; i++) {
+			instrument->map_sample[i] = dumbfile_getc(f) + 1;
+			instrument->map_note[i] = i;
+		}
+
+		if (dumbfile_error(f))
+			return 1;
+
+		/* volume/panning envelopes */
+		for (i = 0; i < 24; i++)
+			vol_points[i] = dumbfile_igetw(f);
+		for (i = 0; i < 24; i++)
+			pan_points[i] = dumbfile_igetw(f);
+
+		instrument->volume_envelope.n_nodes = dumbfile_getc(f);
+		instrument->pan_envelope.n_nodes = dumbfile_getc(f);
+
+		if (dumbfile_error(f))
+			return -1;
+
+		instrument->volume_envelope.sus_loop_start = dumbfile_getc(f);
+		instrument->volume_envelope.loop_start = dumbfile_getc(f);
+		instrument->volume_envelope.loop_end = dumbfile_getc(f);
+
+		instrument->pan_envelope.sus_loop_start = dumbfile_getc(f);
+		instrument->pan_envelope.loop_start = dumbfile_getc(f);
+		instrument->pan_envelope.loop_end = dumbfile_getc(f);
+
+		/* The envelope handler for XM files won't use sus_loop_end. */
+
+		type = dumbfile_getc(f);
+		instrument->volume_envelope.flags = 0;
+		if ((type & XM_ENVELOPE_ON) && instrument->volume_envelope.n_nodes)
+			instrument->volume_envelope.flags |= IT_ENVELOPE_ON;
+		if (type & XM_ENVELOPE_LOOP)    instrument->volume_envelope.flags |= IT_ENVELOPE_LOOP_ON;
+#if 1
+		if (type & XM_ENVELOPE_SUSTAIN) instrument->volume_envelope.flags |= IT_ENVELOPE_SUSTAIN_LOOP;
+#else // This is now handled in itrender.c
+		/* let's avoid fading out when reaching the last envelope node */
+		if (!(type & XM_ENVELOPE_LOOP)) {
+			instrument->volume_envelope.loop_start = instrument->volume_envelope.n_nodes-1;
+			instrument->volume_envelope.loop_end = instrument->volume_envelope.n_nodes-1;
+		}
+		instrument->volume_envelope.flags |= IT_ENVELOPE_LOOP_ON;
+#endif
+
+		type = dumbfile_getc(f);
+		instrument->pan_envelope.flags = 0;
+		if ((type & XM_ENVELOPE_ON) && instrument->pan_envelope.n_nodes)
+			instrument->pan_envelope.flags |= IT_ENVELOPE_ON;
+		if (type & XM_ENVELOPE_LOOP)    instrument->pan_envelope.flags |= IT_ENVELOPE_LOOP_ON; // should this be here?
+		if (type & XM_ENVELOPE_SUSTAIN) instrument->pan_envelope.flags |= IT_ENVELOPE_SUSTAIN_LOOP;
+
+		if (it_xm_make_envelope(&instrument->volume_envelope, vol_points, 0) != 0) {
+			TRACE("XM error: volume envelope\n");
+			if (instrument->volume_envelope.flags & IT_ENVELOPE_ON) return -1;
+		}
+
+		if (it_xm_make_envelope(&instrument->pan_envelope, pan_points, -32) != 0) {
+			TRACE("XM error: pan envelope\n");
+			if (instrument->pan_envelope.flags & IT_ENVELOPE_ON) return -1;
+		}
+
+		instrument->pitch_envelope.flags = 0;
+
+		extra->vibrato_type = dumbfile_getc(f);
+		extra->vibrato_sweep = dumbfile_getc(f);
+		extra->vibrato_depth = dumbfile_getc(f);
+		extra->vibrato_speed = dumbfile_getc(f);
+
+		if (dumbfile_error(f) || extra->vibrato_type > 4) // XXX
+			return -1;
+
+		/** WARNING: lossy approximation */
+		instrument->fadeout = (dumbfile_igetw(f)*128 + 64)/0xFFF;
+
+		dumbfile_skip(f, 2); /* reserved */
+
+		bytes_read += 4 + 96 + 48 + 48 + 14*1 + 2 + 2;
+	} else
+		for (i = 0; i < 96; i++)
+			instrument->map_sample[i] = 0;
+
+	if (size > bytes_read && dumbfile_skip(f, size - bytes_read))
+		return -1;
+
+	if (skip_end && limit_xm_skip_end(f, skip_end))
+		return -1;
+
+	instrument->new_note_action = NNA_NOTE_CUT;
+	instrument->dup_check_type = DCT_OFF;
+	instrument->dup_check_action = DCA_NOTE_CUT;
+	instrument->pp_separation = 0;
+	instrument->pp_centre = 60; /* C-5 */
+	instrument->global_volume = 128;
+	instrument->default_pan = 32;
+	instrument->random_volume = 0;
+	instrument->random_pan = 0;
+	instrument->filter_cutoff = 0;
+	instrument->filter_resonance = 0;
+
+	return 0;
+}
+
+
+
+/* I (entheh) have two XM files saved by a very naughty program. After a
+ * 16-bit sample, it saved a rogue byte. The length of the sample was indeed
+ * an odd number, incremented to include the rogue byte.
+ *
+ * In this function we are converting sample lengths and loop points so they
+ * are measured in samples. This means we forget about the extra bytes, and
+ * they don't get skipped. So we fail trying to read the next instrument.
+ *
+ * To get around this, this function returns the number of rogue bytes that
+ * won't be accounted for by reading sample->length samples. It returns a
+ * negative number on failure.
+ */
+static int it_xm_read_sample_header(IT_SAMPLE *sample, DUMBFILE *f)
+{
+	int type;
+	int relative_note_number; /* relative to C4 */
+	int finetune;
+	int roguebytes;
+	int roguebytesmask;
+	int reserved;
+
+	sample->length         = dumbfile_igetl(f);
+	sample->loop_start     = dumbfile_igetl(f);
+	sample->loop_end       = sample->loop_start + dumbfile_igetl(f);
+	sample->global_volume  = 64;
+	sample->default_volume = dumbfile_getc(f);
+	finetune               = (signed char)dumbfile_getc(f); /* -128..127 <=> -1 semitone .. +127/128 of a semitone */
+	type                   = dumbfile_getc(f);
+	sample->default_pan    = dumbfile_getc(f); /* 0-255 */
+	relative_note_number   = (signed char)dumbfile_getc(f);
+
+	reserved = dumbfile_getc(f);
+
+    dumbfile_getnc((char *)sample->name, 22, f);
+	sample->name[22] = 0;
+    trim_whitespace((char *)sample->name, 22);
+
+	sample->filename[0] = 0;
+
+	if (dumbfile_error(f))
+		return -1;
+
+	sample->C5_speed = (int32)(16726.0*pow(DUMB_SEMITONE_BASE, relative_note_number) /**pow(DUMB_PITCH_BASE, )*/ );
+	sample->finetune = finetune*2;
+
+	sample->flags = IT_SAMPLE_EXISTS;
+
+	if (reserved == 0xAD &&
+		(!(type & (XM_SAMPLE_16BIT | XM_SAMPLE_STEREO))))
+	{
+		/* F U Olivier Lapicque */
+		roguebytes = 4;
+		roguebytesmask = 4 << 2;
+	}
+	else
+	{
+		roguebytes = (int)sample->length;
+		roguebytesmask = 3;
+	}
+
+	if (type & XM_SAMPLE_16BIT)
+		sample->flags |= IT_SAMPLE_16BIT;
+	else
+		roguebytesmask >>= 1;
+
+	if (type & XM_SAMPLE_STEREO)
+		sample->flags |= IT_SAMPLE_STEREO;
+	else
+		roguebytesmask >>= 1;
+
+	roguebytes &= roguebytesmask;
+
+	if ((unsigned int)sample->loop_start < (unsigned int)sample->loop_end) {
+		if (type & XM_SAMPLE_FORWARD_LOOP) sample->flags |= IT_SAMPLE_LOOP;
+		if (type & XM_SAMPLE_PINGPONG_LOOP) sample->flags |= IT_SAMPLE_LOOP | IT_SAMPLE_PINGPONG_LOOP;
+	}
+
+	if (sample->length <= 0)
+		sample->flags &= ~IT_SAMPLE_EXISTS;
+
+	return roguebytes;
+}
+
+static void it_xm_fixup_sample_points(IT_SAMPLE *sample)
+{
+	if (sample->flags & IT_SAMPLE_16BIT) {
+		sample->length >>= 1;
+		sample->loop_start >>= 1;
+		sample->loop_end >>= 1;
+	}
+	if (sample->flags & IT_SAMPLE_STEREO) {
+		sample->length >>= 1;
+		sample->loop_start >>= 1;
+		sample->loop_end >>= 1;
+	}
+
+	if ((unsigned int)sample->loop_end > (unsigned int)sample->length)
+		sample->flags &= ~IT_SAMPLE_LOOP;
+	else if ((unsigned int)sample->loop_start >= (unsigned int)sample->loop_end)
+		sample->flags &= ~IT_SAMPLE_LOOP;
+}
+
+static int iswapw(int val)
+{
+	union
+	{
+		short sv;
+		char cv[2];
+	} endiancheck;
+	/* A smart compiler will optimize this check away. */
+	endiancheck.sv = 1;
+	if (endiancheck.cv[0] == 1)
+	{
+		return val;
+	}
+	endiancheck.sv = val;
+	return (unsigned char)endiancheck.cv[0] | (endiancheck.cv[1] << 8);
+}
+
+static int it_xm_read_sample_data(IT_SAMPLE *sample, unsigned char roguebytes, DUMBFILE *f)
+{
+	int old;
+	int32 i;
+//	long truncated_size;
+	int n_channels;
+	int32 datasizebytes;
+
+	if (!(sample->flags & IT_SAMPLE_EXISTS))
+		return dumbfile_skip(f, roguebytes);
+
+#if 0
+	/* let's get rid of the sample data coming after the end of the loop */
+	if ((sample->flags & IT_SAMPLE_LOOP) && sample->loop_end < sample->length && roguebytes != 4) {
+		truncated_size = sample->length - sample->loop_end;
+		sample->length = sample->loop_end;
+	} else {
+		truncated_size = 0;
+	}
+#endif
+	n_channels = sample->flags & IT_SAMPLE_STEREO ? 2 : 1;
+	datasizebytes = sample->length;
+
+	sample->data = malloc(datasizebytes);
+	if (!sample->data)
+		return -1;
+
+	if (roguebytes == 4)
+	{
+		if (_dumb_it_read_sample_data_adpcm4(sample, f) < 0)
+			return -1;
+		return 0;
+	}
+
+	dumbfile_getnc(sample->data, datasizebytes, f);
+
+	if (dumbfile_error(f))
+		return -1;
+
+	/* FMOD extension: Samples compressed with Ogg Vorbis */
+	if (!memcmp((char *)sample->data + 4, "OggS", 4) &&
+		!memcmp((char *)sample->data + 33, "vorbis", 7))
+	{
+		int32 outlen = ((unsigned char *)(sample->data))[0] |
+					  (((unsigned char *)(sample->data))[1] << 8) |
+					  (((unsigned char *)(sample->data))[2] << 16) |
+					  (((unsigned char *)(sample->data))[3] << 24);
+		short *output;
+
+		if (!(sample->flags & IT_SAMPLE_16BIT))
+		{
+			/* Because it'll be 16-bit when we're done with it. */
+			outlen <<= 1;
+		}
+
+		if (sample->flags & IT_SAMPLE_STEREO)
+		{
+			/* OggMod knows nothing of stereo samples and compresses them as mono,
+			 * screwing up the second channel. (Because for whatever reason,
+			 * ModPlug delta encodes them independantly, even though it presents
+			 * the sample as a double-length mono sound to other players.)
+			 */
+			sample->flags &= ~IT_SAMPLE_STEREO;
+			outlen >>= 1;
+			sample->loop_start >>= 1;
+			sample->loop_end >>= 1;
+		}
+		output = dumb_decode_vorbis(outlen, (char *)sample->data + 4, datasizebytes - 4);
+		if (output != NULL)
+		{
+			free(sample->data);
+			sample->data = output;
+			sample->length = outlen;
+			if (!(sample->flags & IT_SAMPLE_16BIT))
+			{
+				sample->flags |= IT_SAMPLE_16BIT;
+				sample->loop_start <<= 1;
+				sample->loop_end <<= 1;
+			}
+
+			it_xm_fixup_sample_points(sample);
+			return 0;
+		}
+		/* Decode failed, so assume it's normal sample data that just so
+		 * happened to look like a Vorbis stream. (Not likely to happen
+		 * by coincidence!) */
+	}
+
+	it_xm_fixup_sample_points(sample);
+
+	/* sample data is stored as signed delta values */
+	old = 0;
+	if (sample->flags & IT_SAMPLE_STEREO)
+	{
+		/* Stereo samples are a ModPlug extension, so to keep compatibility with
+		 * players that don't know about it (and FastTracker 2 itself), the two
+		 * channels are not stored interleaved but rather, one after the other. */
+		int old_r = 0;
+		void *ibuffer = malloc(sample->length << ((sample->flags & IT_SAMPLE_16BIT) ? 2 : 1));
+		if (ibuffer == NULL)
+		{
+			/* No memory => ignore stereo bits at the end */
+			sample->flags &= ~IT_SAMPLE_STEREO;
+		}
+		else if (sample->flags & IT_SAMPLE_16BIT)
+		{
+			for (i = 0; i < sample->length; i++)
+			{
+				((short *)ibuffer)[i*2]   = old += iswapw(((short *)sample->data)[i]);
+				((short *)ibuffer)[i*2+1] = old_r += iswapw(((short *)sample->data)[i + sample->length]);
+			}
+		}
+		else
+		{
+			for (i = 0; i < sample->length; i++)
+			{
+				((char *)ibuffer)[i*2]   = old += ((char *)sample->data)[i];
+				((char *)ibuffer)[i*2+1] = old_r += ((char *)sample->data)[i + sample->length];
+			}
+		}
+		if (ibuffer != NULL)
+		{
+			free(sample->data);
+			sample->data = ibuffer;
+		}
+	}
+	if (!(sample->flags & IT_SAMPLE_STEREO))
+	{
+		if (sample->flags & IT_SAMPLE_16BIT)
+		{
+			for (i = 0; i < sample->length; i++)
+				((short *)sample->data)[i] = old += iswapw(((short *)sample->data)[i]);
+		}
+		else
+		{
+			for (i = 0; i < sample->length; i++)
+				((char *)sample->data)[i] = old += ((char *)sample->data)[i];
+		}
+	}
+	return 0;
+}
+
+
+
+/* "Real programmers don't document. If it was hard to write,
+ *  it should be hard to understand."
+ *
+ * (Never trust the documentation provided with a tracker.
+ *  Real files are the only truth...)
+ */
+static DUMB_IT_SIGDATA *it_xm_load_sigdata(DUMBFILE *f, int * version)
+{
+	DUMB_IT_SIGDATA *sigdata;
+	char id_text[18];
+
+	int header_size;
+	int flags;
+	int n_channels;
+	int total_samples;
+	int i, j;
+
+	/* check ID text */
+	if (dumbfile_getnc(id_text, 17, f) < 17)
+		return NULL;
+	id_text[17] = 0;
+	if (strcmp(id_text, "Extended Module: ") != 0) {
+		TRACE("XM error: Not an Extended Module\n");
+		return NULL;
+	}
+
+	sigdata = malloc(sizeof(*sigdata));
+	if (!sigdata)
+		return NULL;
+
+	/* song name */
+    if (dumbfile_getnc((char *)sigdata->name, 20, f) < 20) {
+		free(sigdata);
+		return NULL;
+	}
+	sigdata->name[20] = 0;
+    trim_whitespace((char *)sigdata->name, 20);
+
+	if (dumbfile_getc(f) != 0x1A) {
+		TRACE("XM error: 0x1A not found\n");
+		free(sigdata);
+		return NULL;
+	}
+
+	/* tracker name */
+	if (dumbfile_skip(f, 20)) {
+		free(sigdata);
+		return NULL;
+	}
+
+	/* version number */
+	* version = dumbfile_igetw(f);
+	if (* version > 0x0104 || * version < 0x0102) {
+		TRACE("XM error: wrong format version\n");
+		free(sigdata);
+		return NULL;
+	}
+
+	/*
+		------------------
+		---   Header   ---
+		------------------
+	*/
+
+	/* header size */
+	header_size = dumbfile_igetl(f);
+	if (header_size < (4 + 2*8 + 1) || header_size > 0x114) {
+		TRACE("XM error: unexpected header size\n");
+		free(sigdata);
+		return NULL;
+	}
+
+	sigdata->song_message = NULL;
+	sigdata->order = NULL;
+	sigdata->instrument = NULL;
+	sigdata->sample = NULL;
+	sigdata->pattern = NULL;
+	sigdata->midi = NULL;
+	sigdata->checkpoint = NULL;
+
+	sigdata->n_samples        = 0;
+	sigdata->n_orders         = dumbfile_igetw(f);
+	sigdata->restart_position = dumbfile_igetw(f);
+	n_channels                = dumbfile_igetw(f); /* max 32 but we'll be lenient */
+	sigdata->n_pchannels      = n_channels;
+	sigdata->n_patterns       = dumbfile_igetw(f);
+	sigdata->n_instruments    = dumbfile_igetw(f); /* max 128 */ /* XXX upped to 256 */
+	flags                     = dumbfile_igetw(f);
+	sigdata->speed            = dumbfile_igetw(f);
+	if (sigdata->speed == 0) sigdata->speed = 6; // Should we? What about tempo?
+	sigdata->tempo            = dumbfile_igetw(f);
+
+    // FT2 always clips restart position against the song length
+    if (sigdata->restart_position > sigdata->n_orders)
+        sigdata->restart_position = sigdata->n_orders;
+    // And FT2 starts playback on order 0, regardless of length,
+    // and only checks if the next order is greater than or equal
+    // to this, not the current pattern. Work around this with
+    // DUMB's playback core by overriding a zero length with one.
+    if (sigdata->n_orders == 0)
+        sigdata->n_orders = 1;
+
+	/* sanity checks */
+	// XXX
+	i = header_size - 4 - 2 * 8; /* Maximum number of orders expected */
+	if (dumbfile_error(f) || sigdata->n_orders <= 0 || sigdata->n_orders > i || sigdata->n_patterns > 256 || sigdata->n_instruments > 256 || n_channels > DUMB_IT_N_CHANNELS) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+	//if (sigdata->restart_position >= sigdata->n_orders)
+		//sigdata->restart_position = 0;
+
+	/* order table */
+	sigdata->order = malloc(sigdata->n_orders*sizeof(*sigdata->order));
+	if (!sigdata->order) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+    dumbfile_getnc((char *)sigdata->order, sigdata->n_orders, f);
+	dumbfile_skip(f, i - sigdata->n_orders);
+
+	if (dumbfile_error(f)) {
+		_dumb_it_unload_sigdata(sigdata);
+		return NULL;
+	}
+
+	if ( * version > 0x103 ) {
+		/*
+			--------------------
+			---   Patterns   ---
+			--------------------
+		*/
+
+		sigdata->pattern = malloc(sigdata->n_patterns * sizeof(*sigdata->pattern));
+		if (!sigdata->pattern) {
+			_dumb_it_unload_sigdata(sigdata);
+			return NULL;
+		}
+		for (i = 0; i < sigdata->n_patterns; i++)
+			sigdata->pattern[i].entry = NULL;
+
+		{
+			unsigned char *buffer = malloc(1280 * n_channels); /* 256 rows * 5 bytes */
+			if (!buffer) {
+				_dumb_it_unload_sigdata(sigdata);
+				return NULL;
+			}
+			for (i = 0; i < sigdata->n_patterns; i++) {
+				if (it_xm_read_pattern(&sigdata->pattern[i], f, n_channels, buffer, * version) != 0) {
+					free(buffer);
+					_dumb_it_unload_sigdata(sigdata);
+					return NULL;
+				}
+			}
+			free(buffer);
+		}
+
+		/*
+		-----------------------------------
+		---   Instruments and Samples   ---
+		-----------------------------------
+		*/
+
+		sigdata->instrument = malloc(sigdata->n_instruments * sizeof(*sigdata->instrument));
+		if (!sigdata->instrument) {
+			_dumb_it_unload_sigdata(sigdata);
+			return NULL;
+		}
+
+		/* With XM, samples are not global, they're part of an instrument. In a
+		* file, each instrument is stored with its samples. Because of this, I
+		* don't know how to find how many samples are present in the file. Thus
+		* I have to do n_instruments reallocation on sigdata->sample.
+		* Looking at FT2, it doesn't seem possible to have more than 16 samples
+		* per instrument (even though n_samples is stored as 2 bytes). So maybe
+		* we could allocate a 128*16 array of samples, and shrink it back to the
+		* correct size when we know it?
+		* Alternatively, I could allocate samples by blocks of N (still O(n)),
+		* or double the number of allocated samples when I need more (O(log n)).
+		*/
+		total_samples = 0;
+		sigdata->sample = NULL;
+
+		for (i = 0; i < sigdata->n_instruments; i++) {
+			XM_INSTRUMENT_EXTRA extra;
+
+			DUMBFILE * lf = dumbfile_limit_xm( f );
+			if ( !lf ) {
+				_dumb_it_unload_sigdata(sigdata);
+				return NULL;
+			}
+
+			if (it_xm_read_instrument(&sigdata->instrument[i], &extra, lf) < 0) {
+				// XXX
+				if ( ! i )
+				{
+					TRACE("XM error: instrument %d\n", i+1);
+					dumbfile_close( lf );
+					_dumb_it_unload_sigdata(sigdata);
+					return NULL;
+				}
+				else
+				{
+					dumbfile_close( lf );
+					sigdata->n_instruments = i;
+					break;
+				}
+			}
+
+			if (extra.n_samples) {
+				unsigned char roguebytes[XM_MAX_SAMPLES_PER_INSTRUMENT];
+
+				/* adjust instrument sample map (make indices absolute) */
+				for (j = 0; j < 96; j++)
+					sigdata->instrument[i].map_sample[j] += total_samples;
+
+				sigdata->sample = safe_realloc(sigdata->sample, sizeof(*sigdata->sample)*(total_samples+extra.n_samples));
+				if (!sigdata->sample) {
+					dumbfile_close( lf );
+					_dumb_it_unload_sigdata(sigdata);
+					return NULL;
+				}
+				for (j = total_samples; j < total_samples+extra.n_samples; j++)
+					sigdata->sample[j].data = NULL;
+
+				if ( limit_xm_resize( lf, 0 ) < 0 ) {
+					dumbfile_close( lf );
+					_dumb_it_unload_sigdata( sigdata );
+					return NULL;
+				}
+
+				/* read instrument's samples */
+				for (j = 0; j < extra.n_samples; j++) {
+					IT_SAMPLE *sample = &sigdata->sample[total_samples+j];
+					int b;
+					if ( limit_xm_resize( lf, extra.sample_header_size ) < 0 ) {
+						dumbfile_close( lf );
+						_dumb_it_unload_sigdata( sigdata );
+						return NULL;
+					}
+					b = it_xm_read_sample_header(sample, lf);
+					if (b < 0) {
+						dumbfile_close( lf );
+						_dumb_it_unload_sigdata(sigdata);
+						return NULL;
+					}
+					roguebytes[j] = b;
+					// Any reason why these can't be set inside it_xm_read_sample_header()?
+					sample->vibrato_speed = extra.vibrato_speed;
+					sample->vibrato_depth = extra.vibrato_depth;
+					sample->vibrato_rate = extra.vibrato_sweep;
+					/* Rate and sweep don't match, but the difference is
+					* accounted for in itrender.c.
+				 */
+					sample->vibrato_waveform = xm_convert_vibrato[extra.vibrato_type];
+					sample->max_resampling_quality = -1;
+				}
+				for (j = 0; j < extra.n_samples; j++) {
+					if (it_xm_read_sample_data(&sigdata->sample[total_samples+j], roguebytes[j], f) != 0) {
+						dumbfile_close( lf );
+						_dumb_it_unload_sigdata(sigdata);
+						return NULL;
+					}
+				}
+				total_samples += extra.n_samples;
+			}
+
+			dumbfile_close( lf );
+		}
+
+		sigdata->n_samples = total_samples;
+	} else {
+		// ahboy! old layout!
+		// first instruments and sample headers, then patterns, then sample data!
+
+		/*
+		-----------------------------------
+		---   Instruments and Samples   ---
+		-----------------------------------
+		*/
+
+		unsigned char * roguebytes = malloc( sigdata->n_instruments * XM_MAX_SAMPLES_PER_INSTRUMENT );
+		if (!roguebytes) {
+			_dumb_it_unload_sigdata(sigdata);
+			return NULL;
+		}
+
+		sigdata->instrument = malloc(sigdata->n_instruments * sizeof(*sigdata->instrument));
+		if (!sigdata->instrument) {
+			free(roguebytes);
+			_dumb_it_unload_sigdata(sigdata);
+			return NULL;
+		}
+
+		total_samples = 0;
+		sigdata->sample = NULL;
+
+		for (i = 0; i < sigdata->n_instruments; i++) {
+			XM_INSTRUMENT_EXTRA extra;
+
+			DUMBFILE * lf = dumbfile_limit_xm( f );
+			if ( !lf ) {
+				free(roguebytes);
+				_dumb_it_unload_sigdata(sigdata);
+				return NULL;
+			}
+
+			if (it_xm_read_instrument(&sigdata->instrument[i], &extra, lf) < 0) {
+				TRACE("XM error: instrument %d\n", i+1);
+				dumbfile_close(lf);
+				free(roguebytes);
+				_dumb_it_unload_sigdata(sigdata);
+				return NULL;
+			}
+
+			if (extra.n_samples) {
+				/* adjust instrument sample map (make indices absolute) */
+				for (j = 0; j < 96; j++)
+					sigdata->instrument[i].map_sample[j] += total_samples;
+
+				sigdata->sample = safe_realloc(sigdata->sample, sizeof(*sigdata->sample)*(total_samples+extra.n_samples));
+				if (!sigdata->sample) {
+					dumbfile_close( lf );
+					free(roguebytes);
+					_dumb_it_unload_sigdata(sigdata);
+					return NULL;
+				}
+				for (j = total_samples; j < total_samples+extra.n_samples; j++)
+					sigdata->sample[j].data = NULL;
+
+				if ( limit_xm_resize( lf, 0 ) < 0 ) {
+					dumbfile_close( lf );
+					free( roguebytes );
+					_dumb_it_unload_sigdata( sigdata );
+					return NULL;
+				}
+
+				/* read instrument's samples */
+				for (j = 0; j < extra.n_samples; j++) {
+					IT_SAMPLE *sample = &sigdata->sample[total_samples+j];
+					int b;
+					if ( limit_xm_resize( lf, extra.sample_header_size ) < 0 ) {
+							dumbfile_close( lf );
+							free( roguebytes );
+							_dumb_it_unload_sigdata( sigdata );
+							return NULL;
+					}
+					b = it_xm_read_sample_header(sample, lf);
+					if (b < 0) {
+						free(roguebytes);
+						_dumb_it_unload_sigdata(sigdata);
+						return NULL;
+					}
+					roguebytes[total_samples+j] = b;
+					// Any reason why these can't be set inside it_xm_read_sample_header()?
+					sample->vibrato_speed = extra.vibrato_speed;
+					sample->vibrato_depth = extra.vibrato_depth;
+					sample->vibrato_rate = extra.vibrato_sweep;
+					/* Rate and sweep don't match, but the difference is
+					* accounted for in itrender.c.
+				 */
+					sample->vibrato_waveform = xm_convert_vibrato[extra.vibrato_type];
+					sample->max_resampling_quality = -1;
+				}
+				total_samples += extra.n_samples;
+			}
+
+			dumbfile_close( lf );
+		}
+
+		sigdata->n_samples = total_samples;
+
+		/*
+			--------------------
+			---   Patterns   ---
+			--------------------
+		*/
+
+		sigdata->pattern = malloc(sigdata->n_patterns * sizeof(*sigdata->pattern));
+		if (!sigdata->pattern) {
+			free(roguebytes);
+			_dumb_it_unload_sigdata(sigdata);
+			return NULL;
+		}
+		for (i = 0; i < sigdata->n_patterns; i++)
+			sigdata->pattern[i].entry = NULL;
+
+		{
+			unsigned char *buffer = malloc(1280 * n_channels); /* 256 rows * 5 bytes */
+			if (!buffer) {
+				free(roguebytes);
+				_dumb_it_unload_sigdata(sigdata);
+				return NULL;
+			}
+			for (i = 0; i < sigdata->n_patterns; i++) {
+				if (it_xm_read_pattern(&sigdata->pattern[i], f, n_channels, buffer, * version) != 0) {
+					free(buffer);
+					free(roguebytes);
+					_dumb_it_unload_sigdata(sigdata);
+					return NULL;
+				}
+			}
+			free(buffer);
+		}
+
+		// and now we load the sample data
+		for (j = 0; j < total_samples; j++) {
+			if (it_xm_read_sample_data(&sigdata->sample[j], roguebytes[j], f) != 0) {
+				free(roguebytes);
+				_dumb_it_unload_sigdata(sigdata);
+				return NULL;
+			}
+		}
+
+		free(roguebytes);
+	}
+
+
+	sigdata->flags = IT_WAS_AN_XM | IT_OLD_EFFECTS | IT_COMPATIBLE_GXX | IT_STEREO | IT_USE_INSTRUMENTS;
+	// Are we OK with IT_COMPATIBLE_GXX off?
+	//
+	// When specifying note + instr + tone portamento, and an old note is still playing (even after note off):
+	// - If Compatible Gxx is on, the new note will be triggered only if the instrument _changes_.
+	// - If Compatible Gxx is off, the new note will always be triggered, provided the instrument is specified.
+	// - FT2 seems to do the latter (unconfirmed).
+
+	// Err, wait. XM playback has its own code. The change made to the IT
+	// playbackc code didn't affect XM playback. Forget this then. There's
+	// still a bug in XM playback though, and it'll need some investigation...
+	// tomorrow...
+
+	// UPDATE: IT_COMPATIBLE_GXX is required to be on, so that tone porta has
+	// separate memory from portamento.
+
+	if (flags & XM_LINEAR_FREQUENCY)
+		sigdata->flags |= IT_LINEAR_SLIDES;
+
+	sigdata->global_volume = 128;
+	sigdata->mixing_volume = 48;
+	sigdata->pan_separation = 128;
+
+	memset(sigdata->channel_volume, 64, DUMB_IT_N_CHANNELS);
+	memset(sigdata->channel_pan, 32, DUMB_IT_N_CHANNELS);
+
+	_dumb_it_fix_invalid_orders(sigdata);
+
+	return sigdata;
+}
+
+
+
+#if 0 // no fucking way, dude!
+
+/* The length returned is the time required to play from the beginning of the
+ * file to the last row of the last order (which is when the player will
+ * loop). Depending on the song, the sound might stop sooner.
+ * Due to fixed point roundoffs, I think this is only reliable to the second.
+ * Full precision could be achieved by using a double during the computation,
+ * or maybe a LONG_LONG.
+ */
+int32 it_compute_length(const DUMB_IT_SIGDATA *sigdata)
+{
+	IT_PATTERN *pattern;
+	int tempo, speed;
+	int loop_start[IT_N_CHANNELS];
+	char loop_count[IT_N_CHANNELS];
+	int order, entry;
+	int row_first_entry = 0;
+	int jump, jump_dest;
+	int delay, fine_delay;
+	int i;
+	int32 t;
+
+	if (!sigdata)
+		return 0;
+
+	tempo = sigdata->tempo;
+	speed = sigdata->speed;
+	order = entry = 0;
+	jump = jump_dest = 0;
+	t = 0;
+
+	/* for each PATTERN */
+	for (order = 0; order < sigdata->n_orders; order++) {
+
+		if (sigdata->order[order] == IT_ORDER_END) break;
+		if (sigdata->order[order] == IT_ORDER_SKIP) continue;
+
+		for (i = 0; i < IT_N_CHANNELS; i++)
+			loop_count[i] = -1;
+
+		pattern = &sigdata->pattern[ sigdata->order[order] ];
+		entry = 0;
+		if (jump == IT_BREAK_TO_ROW) {
+			int row = 0;
+			while (row < jump_dest)
+				if (pattern->entry[entry++].channel >= IT_N_CHANNELS)
+					row++;
+		}
+
+		/* for each ROW */
+		while (entry < pattern->n_entries) {
+			row_first_entry = entry;
+			delay = fine_delay = 0;
+			jump = 0;
+
+			/* for each note NOTE */
+			while (entry < pattern->n_entries && pattern->entry[entry].channel < IT_N_CHANNELS) {
+				int value   = pattern->entry[entry].effectvalue;
+				int channel = pattern->entry[entry].channel;
+
+				switch (pattern->entry[entry].effect) {
+
+					case IT_SET_SPEED: speed = value; break;
+
+					case IT_JUMP_TO_ORDER:
+						if (value <= order) /* infinite loop */
+							return 0;
+						jump = IT_JUMP_TO_ORDER;
+						jump_dest = value;
+						break;
+
+					case IT_BREAK_TO_ROW:
+						jump = IT_BREAK_TO_ROW;
+						jump_dest = value;
+						break;
+
+					case IT_S:
+						switch (HIGH(value)) {
+							case IT_S_PATTERN_DELAY:      delay      = LOW(value); break;
+							case IT_S_FINE_PATTERN_DELAY: fine_delay = LOW(value); break;
+							case IT_S_PATTERN_LOOP:
+								if (LOW(value) == 0) {
+									loop_start[channel] = row_first_entry;
+								} else {
+									if (loop_count[channel] == -1)
+										loop_count[channel] = LOW(value);
+
+									if (loop_count[channel]) {
+										jump = IT_S_PATTERN_LOOP;
+										jump_dest = loop_start[channel];
+									}
+									loop_count[channel]--;
+								}
+								break;
+						}
+						break;
+
+					case IT_SET_SONG_TEMPO:
+						switch (HIGH(value)) { /* slides happen every non-row frames */
+							case 0:  tempo = tempo - LOW(value)*(speed-1); break;
+							case 1:  tempo = tempo + LOW(value)*(speed-1); break;
+							default: tempo = value;
+						}
+						tempo = MID(32, tempo, 255);
+						break;
+				}
+
+				entry++;
+			}
+
+			/* end of ROW */
+			entry++;
+			t += TICK_TIME_DIVIDEND * (speed*(1+delay) + fine_delay) / tempo;
+
+			if (jump == IT_JUMP_TO_ORDER) {
+				order = jump_dest - 1;
+				break;
+			} else if (jump == IT_BREAK_TO_ROW)
+				break;
+			else if (jump == IT_S_PATTERN_LOOP)
+				entry = jump_dest - 1;
+		}
+
+		/* end of PATTERN */
+	}
+
+	return t;
+}
+
+#endif /* 0 */
+
+
+static char hexdigit(int in)
+{
+	if (in < 10) return in + '0';
+	else return in + 'A' - 10;
+}
+
+DUH *DUMBEXPORT dumb_read_xm_quick(DUMBFILE *f)
+{
+	sigdata_t *sigdata;
+	int ver;
+
+	DUH_SIGTYPE_DESC *descptr = &_dumb_sigtype_it;
+
+	sigdata = it_xm_load_sigdata(f, &ver);
+
+	if (!sigdata)
+		return NULL;
+
+	{
+		char version[16];
+		const char *tag[2][2];
+		tag[0][0] = "TITLE";
+        tag[0][1] = (const char *)(((DUMB_IT_SIGDATA *)sigdata)->name);
+		tag[1][0] = "FORMAT";
+		version[0] = 'X';
+		version[1] = 'M';
+		version[2] = ' ';
+		version[3] = 'v';
+		version[4] = hexdigit( ( ver >> 8 ) & 15 );
+		version[5] = '.';
+		version[6] = hexdigit( ( ver >> 4 ) & 15 );
+		version[7] = hexdigit( ver & 15 );
+		version[8] = 0;
+		tag[1][1] = ( const char * ) & version;
+		return make_duh(-1, 2, (const char *const (*)[2])tag, 1, &descptr, &sigdata);
+	}
+}
diff --git a/libraries/dumb/src/it/readxm2.c b/libraries/dumb/src/it/readxm2.c
new file mode 100644
index 000000000..7a721d852
--- /dev/null
+++ b/libraries/dumb/src/it/readxm2.c
@@ -0,0 +1,29 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * readxm2.c - Function to read a Fast Tracker II     / / \  \
+ *             module from an open file and do an    | <  /   \_
+ *             initial run-through.                  |  \/ /\   /
+ *                                                    \_  /  > /
+ * Split off from readxm.c by entheh.                   | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+#include "dumb.h"
+
+
+
+DUH *DUMBEXPORT dumb_read_xm(DUMBFILE *f)
+{
+	DUH *duh = dumb_read_xm_quick(f);
+	dumb_it_do_initial_runthrough(duh);
+	return duh;
+}
diff --git a/libraries/dumb/src/it/xmeffect.c b/libraries/dumb/src/it/xmeffect.c
new file mode 100644
index 000000000..96cf7da67
--- /dev/null
+++ b/libraries/dumb/src/it/xmeffect.c
@@ -0,0 +1,245 @@
+/*  _______         ____    __         ___    ___
+ * \    _  \       \    /  \  /       \   \  /   /       '   '  '
+ *  |  | \  \       |  |    ||         |   \/   |         .      .
+ *  |  |  |  |      |  |    ||         ||\  /|  |
+ *  |  |  |  |      |  |    ||         || \/ |  |         '  '  '
+ *  |  |  |  |      |  |    ||         ||    |  |         .      .
+ *  |  |_/  /        \  \__//          ||    |  |
+ * /_______/ynamic    \____/niversal  /__\  /____\usic   /|  .  . ibliotheque
+ *                                                      /  \
+ *                                                     / .  \
+ * xmeffect.c - Code for converting MOD/XM            / / \  \
+ *              effects to IT effects.               | <  /   \_
+ *                                                   |  \/ /\   /
+ * By Julien Cugniere. Ripped out of readxm.c         \_  /  > /
+ * by entheh.                                           | \ / /
+ *                                                      |  ' /
+ *                                                       \__/
+ */
+
+
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "dumb.h"
+#include "internal/it.h"
+
+#if 0
+unsigned char **_dumb_malloc2(int w, int h)
+{
+	unsigned char **line =  malloc(h * sizeof(*line));
+	int i;
+	if (!line) return NULL;
+
+	line[0] = malloc(w * h * sizeof(*line[0]));
+	if (!line[0]) {
+		free(line);
+		return NULL;
+	}
+
+	for (i = 1; i < h; i++)
+		line[i] = line[i-1] + w;
+
+	memset(line[0], 0, w*h);
+
+	return line;
+}
+
+
+
+void _dumb_free2(unsigned char **line)
+{
+	if (line) {
+		if (line[0])
+			free(line[0]);
+		free(line);
+	}
+}
+
+
+
+/* Effects having a memory. 2 means that the two parts of the effectvalue
+ * should be handled separately.
+ */
+static const char xm_has_memory[] = {
+/*	0  1  2  3  4  5  6  7  8  9  A  B  C  D (E) F  G  H        K  L           P     R     T          (X) */
+	0, 1, 1, 1, 2, 1, 1, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,
+
+/*  E0 E1 E2 E3 E4 E5 E6 E7    E9 EA EB EC ED EE         X1 X2 */
+	0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,   0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+#endif
+
+
+
+/* Effects marked with 'special' are handled specifically in itrender.c */
+void _dumb_it_xm_convert_effect(int effect, int value, IT_ENTRY *entry, int mod)
+{
+const int log = 0;
+
+	if ((!effect && !value) || (effect >= XM_N_EFFECTS))
+		return;
+
+if (log) printf("%c%02X", (effect<10)?('0'+effect):('A'+effect-10), value);
+
+	/* Linearisation of the effect number... */
+	if (effect == XM_E) {
+		effect = EBASE + HIGH(value);
+		value = LOW(value);
+	} else if (effect == XM_X) {
+		effect = XBASE + HIGH(value);
+		value = LOW(value);
+	}
+
+if (log) printf(" - %2d %02X", effect, value);
+
+#if 0 // This should be handled in itrender.c!
+	/* update effect memory */
+	switch (xm_has_memory[effect]) {
+		case 1:
+			if (!value)
+				value = memory[entry->channel][effect];
+			else
+				memory[entry->channel][effect] = value;
+			break;
+
+		case 2:
+			if (!HIGH(value))
+				SET_HIGH(value, HIGH(memory[entry->channel][effect]));
+			else
+				SET_HIGH(memory[entry->channel][effect], HIGH(value));
+
+			if (!LOW(value))
+				SET_LOW(value, LOW(memory[entry->channel][effect]));
+			else
+				SET_LOW(memory[entry->channel][effect], LOW(value));
+			break;
+	}
+#endif
+
+	/* convert effect */
+	entry->mask |= IT_ENTRY_EFFECT;
+	switch (effect) {
+
+		case XM_APPREGIO:           effect = IT_ARPEGGIO;           break;
+		case XM_VIBRATO:            effect = IT_VIBRATO;            break;
+		case XM_TONE_PORTAMENTO:    effect = IT_TONE_PORTAMENTO;    break;
+		case XM_TREMOLO:            effect = IT_TREMOLO;            break;
+		case XM_SET_PANNING:        effect = IT_SET_PANNING;        break;
+		case XM_SAMPLE_OFFSET:      effect = IT_SET_SAMPLE_OFFSET;  break;
+		case XM_POSITION_JUMP:      effect = IT_JUMP_TO_ORDER;      break;
+		case XM_MULTI_RETRIG:       effect = IT_RETRIGGER_NOTE;     break;
+		case XM_TREMOR:             effect = IT_TREMOR;             break;
+		case XM_PORTAMENTO_UP:      effect = IT_XM_PORTAMENTO_UP;   break;
+		case XM_PORTAMENTO_DOWN:    effect = IT_XM_PORTAMENTO_DOWN; break;
+		case XM_SET_CHANNEL_VOLUME: effect = IT_SET_CHANNEL_VOLUME; break; /* special */
+		case XM_VOLSLIDE_TONEPORTA: effect = IT_VOLSLIDE_TONEPORTA; break; /* special */
+		case XM_VOLSLIDE_VIBRATO:   effect = IT_VOLSLIDE_VIBRATO;   break; /* special */
+
+		case XM_PATTERN_BREAK:
+			effect = IT_BREAK_TO_ROW;
+			value = BCD_TO_NORMAL(value);
+			if (value > 63) value = 0; /* FT2, maybe ProTracker? */
+			break;
+
+		case XM_VOLUME_SLIDE: /* special */
+			effect = IT_VOLUME_SLIDE;
+			value = HIGH(value) ? EFFECT_VALUE(HIGH(value), 0) : EFFECT_VALUE(0, LOW(value));
+			break;
+
+		case XM_PANNING_SLIDE:
+			effect = IT_PANNING_SLIDE;
+			//value = HIGH(value) ? EFFECT_VALUE(HIGH(value), 0) : EFFECT_VALUE(0, LOW(value));
+			value = HIGH(value) ? EFFECT_VALUE(0, HIGH(value)) : EFFECT_VALUE(LOW(value), 0);
+			break;
+
+		case XM_GLOBAL_VOLUME_SLIDE: /* special */
+			effect = IT_GLOBAL_VOLUME_SLIDE;
+			value = HIGH(value) ? EFFECT_VALUE(HIGH(value), 0) : EFFECT_VALUE(0, LOW(value));
+			break;
+
+		case XM_SET_TEMPO_BPM:
+			if (mod) effect = (value <= 0x20) ? (IT_SET_SPEED) : (IT_SET_SONG_TEMPO);
+			else effect = (value < 0x20) ? (IT_SET_SPEED) : (IT_SET_SONG_TEMPO);
+			break;
+
+		case XM_SET_GLOBAL_VOLUME:
+			effect = IT_SET_GLOBAL_VOLUME;
+			value *= 2;
+			if (value > 128) value = 128;
+			break;
+
+		case XM_KEY_OFF:
+			effect = IT_XM_KEY_OFF;
+			break;
+
+		case XM_SET_ENVELOPE_POSITION:
+			effect = IT_XM_SET_ENVELOPE_POSITION;
+			break;
+
+		case EBASE+XM_E_SET_FILTER:            effect = SBASE+IT_S_SET_FILTER;            break;
+		case EBASE+XM_E_SET_GLISSANDO_CONTROL: effect = SBASE+IT_S_SET_GLISSANDO_CONTROL; break; /** TODO */
+		case EBASE+XM_E_SET_FINETUNE:          effect = SBASE+IT_S_FINETUNE;              break;
+		case EBASE+XM_E_SET_LOOP:              effect = SBASE+IT_S_PATTERN_LOOP;          break;
+		case EBASE+XM_E_NOTE_CUT:              effect = SBASE+IT_S_DELAYED_NOTE_CUT;      break;
+		case EBASE+XM_E_NOTE_DELAY:            effect = SBASE+IT_S_NOTE_DELAY;            break;
+		case EBASE+XM_E_PATTERN_DELAY:         effect = SBASE+IT_S_PATTERN_DELAY;         break;
+		case EBASE+XM_E_SET_PANNING:           effect = SBASE+IT_S_SET_PAN;               break;
+		case EBASE+XM_E_FINE_VOLSLIDE_UP:      effect = IT_XM_FINE_VOLSLIDE_UP;           break;
+		case EBASE+XM_E_FINE_VOLSLIDE_DOWN:    effect = IT_XM_FINE_VOLSLIDE_DOWN;         break;
+		case EBASE+XM_E_SET_MIDI_MACRO:        effect = SBASE+IT_S_SET_MIDI_MACRO;        break;
+
+		case EBASE + XM_E_FINE_PORTA_UP:
+			effect = IT_PORTAMENTO_UP;
+			value = EFFECT_VALUE(0xF, value);
+			break;
+
+		case EBASE + XM_E_FINE_PORTA_DOWN:
+			effect = IT_PORTAMENTO_DOWN;
+			value = EFFECT_VALUE(0xF, value);
+			break;
+
+		case EBASE + XM_E_RETRIG_NOTE:
+			effect = IT_XM_RETRIGGER_NOTE;
+			value = EFFECT_VALUE(0, value);
+			break;
+
+		case EBASE + XM_E_SET_VIBRATO_CONTROL:
+			effect = SBASE+IT_S_SET_VIBRATO_WAVEFORM;
+			value &= ~4;
+			break;
+
+		case EBASE + XM_E_SET_TREMOLO_CONTROL:
+			effect = SBASE+IT_S_SET_TREMOLO_WAVEFORM;
+			value &= ~4;
+			break;
+
+		case XBASE + XM_X_EXTRAFINE_PORTA_UP:
+			effect = IT_PORTAMENTO_UP;
+			value = EFFECT_VALUE(0xE, value);
+			break;
+
+		case XBASE + XM_X_EXTRAFINE_PORTA_DOWN:
+			effect = IT_PORTAMENTO_DOWN;
+			value = EFFECT_VALUE(0xE, value);
+			break;
+
+		default:
+			/* user effect (often used in demos for synchronisation) */
+			entry->mask &= ~IT_ENTRY_EFFECT;
+	}
+
+if (log) printf(" - %2d %02X", effect, value);
+
+	/* Inverse linearisation... */
+	if (effect >= SBASE && effect < SBASE+16) {
+		value = EFFECT_VALUE(effect-SBASE, value);
+		effect = IT_S;
+	}
+
+if (log) printf(" - %c%02X\n", 'A'+effect-1, value);
+
+	entry->effect = effect;
+	entry->effectvalue = value;
+}
diff --git a/libraries/dumb/vc6/dumb/.gitignore b/libraries/dumb/vc6/dumb/.gitignore
new file mode 100644
index 000000000..a5aab370c
--- /dev/null
+++ b/libraries/dumb/vc6/dumb/.gitignore
@@ -0,0 +1,3 @@
+*.user
+Debug
+Release
\ No newline at end of file
diff --git a/libraries/dumb/vc6/dumb/dumb.vcxproj b/libraries/dumb/vc6/dumb/dumb.vcxproj
new file mode 100644
index 000000000..bc10c9a67
--- /dev/null
+++ b/libraries/dumb/vc6/dumb/dumb.vcxproj
@@ -0,0 +1,216 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{612D360C-A51B-4B34-8F49-33F42A2957F5}</ProjectGuid>
+    <RootNamespace>dumb</RootNamespace>
+    <SccProjectName>
+    </SccProjectName>
+    <SccLocalPath>
+    </SccLocalPath>
+    <SccProvider>
+    </SccProvider>
+    <SccAuxPath>
+    </SccAuxPath>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v120_xp</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v120_xp</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.21006.1</_ProjectFileVersion>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_USE_SSE;RESAMPLER_DECORATE=dumb;BARRAY_DECORATE=dumb;_DEBUG;WIN32;_LIB;DUMB_DECLARE_DEPRECATED;DEBUGMODE=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <AdditionalIncludeDirectories>../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_USE_SSE;RESAMPLER_DECORATE=dumb;BARRAY_DECORATE=dumb;NDEBUG;WIN32;_LIB;DUMB_DECLARE_DEPRECATED;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <StringPooling>true</StringPooling>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <FloatingPointModel>Fast</FloatingPointModel>
+      <EnableEnhancedInstructionSet>NoExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\core\atexit.c" />
+    <ClCompile Include="..\..\src\core\duhlen.c" />
+    <ClCompile Include="..\..\src\core\duhtag.c" />
+    <ClCompile Include="..\..\src\core\dumbfile.c" />
+    <ClCompile Include="..\..\src\core\loadduh.c" />
+    <ClCompile Include="..\..\src\core\makeduh.c" />
+    <ClCompile Include="..\..\src\core\rawsig.c" />
+    <ClCompile Include="..\..\src\core\readduh.c" />
+    <ClCompile Include="..\..\src\core\register.c" />
+    <ClCompile Include="..\..\src\core\rendduh.c" />
+    <ClCompile Include="..\..\src\core\rendsig.c" />
+    <ClCompile Include="..\..\src\core\unload.c" />
+    <ClCompile Include="..\..\src\helpers\barray.c" />
+    <ClCompile Include="..\..\src\helpers\clickrem.c" />
+    <ClCompile Include="..\..\src\helpers\resampler.c" />
+    <ClCompile Include="..\..\src\helpers\lpc.c" />
+    <ClCompile Include="..\..\src\helpers\memfile.c" />
+    <ClCompile Include="..\..\src\helpers\resample.c" />
+    <ClCompile Include="..\..\src\helpers\riff.c" />
+    <ClCompile Include="..\..\src\helpers\sampbuf.c" />
+    <ClCompile Include="..\..\src\helpers\silence.c" />
+    <ClCompile Include="..\..\src\helpers\stdfile.c" />
+    <ClCompile Include="..\..\src\it\itmisc.c" />
+    <ClCompile Include="..\..\src\it\itorder.c" />
+    <ClCompile Include="..\..\src\it\itrender.c" />
+    <ClCompile Include="..\..\src\it\itunload.c" />
+    <ClCompile Include="..\..\src\it\loadany.c" />
+    <ClCompile Include="..\..\src\it\loadany2.c" />
+    <ClCompile Include="..\..\src\it\loadokt.c" />
+    <ClCompile Include="..\..\src\it\loadokt2.c" />
+    <ClCompile Include="..\..\src\it\ptmeffect.c" />
+    <ClCompile Include="..\..\src\it\readany.c" />
+    <ClCompile Include="..\..\src\it\readany2.c" />
+    <ClCompile Include="..\..\src\it\readokt.c" />
+    <ClCompile Include="..\..\src\it\readokt2.c" />
+    <ClCompile Include="..\..\src\it\xmeffect.c" />
+    <ClCompile Include="..\..\src\it\itload.c" />
+    <ClCompile Include="..\..\src\it\itload2.c" />
+    <ClCompile Include="..\..\src\it\load669.c" />
+    <ClCompile Include="..\..\src\it\load6692.c" />
+    <ClCompile Include="..\..\src\it\loadamf.c" />
+    <ClCompile Include="..\..\src\it\loadamf2.c" />
+    <ClCompile Include="..\..\src\it\loadasy.c" />
+    <ClCompile Include="..\..\src\it\loadasy2.c" />
+    <ClCompile Include="..\..\src\it\loadmod.c" />
+    <ClCompile Include="..\..\src\it\loadmod2.c" />
+    <ClCompile Include="..\..\src\it\loadmtm.c" />
+    <ClCompile Include="..\..\src\it\loadmtm2.c" />
+    <ClCompile Include="..\..\src\it\loadoldpsm.c" />
+    <ClCompile Include="..\..\src\it\loadoldpsm2.c" />
+    <ClCompile Include="..\..\src\it\loadpsm.c" />
+    <ClCompile Include="..\..\src\it\loadpsm2.c" />
+    <ClCompile Include="..\..\src\it\loadptm.c" />
+    <ClCompile Include="..\..\src\it\loadptm2.c" />
+    <ClCompile Include="..\..\src\it\loadriff.c" />
+    <ClCompile Include="..\..\src\it\loadriff2.c" />
+    <ClCompile Include="..\..\src\it\loads3m.c" />
+    <ClCompile Include="..\..\src\it\loads3m2.c" />
+    <ClCompile Include="..\..\src\it\loadstm.c" />
+    <ClCompile Include="..\..\src\it\loadstm2.c" />
+    <ClCompile Include="..\..\src\it\loadxm.c" />
+    <ClCompile Include="..\..\src\it\loadxm2.c" />
+    <ClCompile Include="..\..\src\it\itread.c" />
+    <ClCompile Include="..\..\src\it\itread2.c" />
+    <ClCompile Include="..\..\src\it\read669.c" />
+    <ClCompile Include="..\..\src\it\read6692.c" />
+    <ClCompile Include="..\..\src\it\readam.c" />
+    <ClCompile Include="..\..\src\it\readamf.c" />
+    <ClCompile Include="..\..\src\it\readamf2.c" />
+    <ClCompile Include="..\..\src\it\readasy.c" />
+    <ClCompile Include="..\..\src\it\readdsmf.c" />
+    <ClCompile Include="..\..\src\it\readmod.c" />
+    <ClCompile Include="..\..\src\it\readmod2.c" />
+    <ClCompile Include="..\..\src\it\readmtm.c" />
+    <ClCompile Include="..\..\src\it\readoldpsm.c" />
+    <ClCompile Include="..\..\src\it\readpsm.c" />
+    <ClCompile Include="..\..\src\it\readptm.c" />
+    <ClCompile Include="..\..\src\it\readriff.c" />
+    <ClCompile Include="..\..\src\it\reads3m.c" />
+    <ClCompile Include="..\..\src\it\reads3m2.c" />
+    <ClCompile Include="..\..\src\it\readstm.c" />
+    <ClCompile Include="..\..\src\it\readstm2.c" />
+    <ClCompile Include="..\..\src\it\readxm.c" />
+    <ClCompile Include="..\..\src\it\readxm2.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <CustomBuild Include="..\..\src\helpers\resamp2.inc">
+      <FileType>Document</FileType>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+    </CustomBuild>
+    <CustomBuild Include="..\..\src\helpers\resamp3.inc">
+      <FileType>Document</FileType>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+    </CustomBuild>
+    <CustomBuild Include="..\..\src\helpers\resample.inc">
+      <FileType>Document</FileType>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+    </CustomBuild>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\include\dumb.h" />
+    <ClInclude Include="..\..\include\internal\barray.h" />
+    <ClInclude Include="..\..\include\internal\dumb.h" />
+    <ClInclude Include="..\..\include\internal\dumbfile.h" />
+    <ClInclude Include="..\..\include\internal\fir_resampler.h" />
+    <ClInclude Include="..\..\include\internal\it.h" />
+    <ClInclude Include="..\..\include\internal\resampler.h" />
+    <ClInclude Include="..\..\include\internal\lpc.h" />
+    <ClInclude Include="..\..\include\internal\riff.h" />
+    <ClInclude Include="..\..\include\internal\stack_alloc.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/libraries/dumb/vc6/dumb/dumb.vcxproj.filters b/libraries/dumb/vc6/dumb/dumb.vcxproj.filters
new file mode 100644
index 000000000..bd096043f
--- /dev/null
+++ b/libraries/dumb/vc6/dumb/dumb.vcxproj.filters
@@ -0,0 +1,326 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="include">
+      <UniqueIdentifier>{419c5e1f-2bf4-473a-b2e5-2e531285aa62}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="include\internal">
+      <UniqueIdentifier>{44b333b3-1607-4820-82bc-e4c21a40e31a}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="src">
+      <UniqueIdentifier>{0b122556-3781-4ef3-87fe-ffa5fb50b493}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="src\core">
+      <UniqueIdentifier>{e961cd19-26f6-4df0-b895-e099d3e81db9}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="src\helpers">
+      <UniqueIdentifier>{82e35139-08ff-4e99-a3ce-2254d7427ec4}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="src\it">
+      <UniqueIdentifier>{5f7fc0f6-4008-4166-83ad-e5d914718bd0}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="src\it\loaders">
+      <UniqueIdentifier>{0fd0715e-5824-4419-aa5b-2d4272d222ce}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="src\it\readers">
+      <UniqueIdentifier>{b9e26fe7-6056-4580-b2c6-10e6116d4129}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\core\atexit.c">
+      <Filter>src\core</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\core\duhlen.c">
+      <Filter>src\core</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\core\duhtag.c">
+      <Filter>src\core</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\core\dumbfile.c">
+      <Filter>src\core</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\core\loadduh.c">
+      <Filter>src\core</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\core\makeduh.c">
+      <Filter>src\core</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\core\rawsig.c">
+      <Filter>src\core</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\core\readduh.c">
+      <Filter>src\core</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\core\register.c">
+      <Filter>src\core</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\core\rendduh.c">
+      <Filter>src\core</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\core\rendsig.c">
+      <Filter>src\core</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\core\unload.c">
+      <Filter>src\core</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\helpers\barray.c">
+      <Filter>src\helpers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\helpers\clickrem.c">
+      <Filter>src\helpers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\helpers\lpc.c">
+      <Filter>src\helpers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\helpers\memfile.c">
+      <Filter>src\helpers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\helpers\resample.c">
+      <Filter>src\helpers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\helpers\riff.c">
+      <Filter>src\helpers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\helpers\sampbuf.c">
+      <Filter>src\helpers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\helpers\silence.c">
+      <Filter>src\helpers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\helpers\stdfile.c">
+      <Filter>src\helpers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\itload.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\itload2.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\itmisc.c">
+      <Filter>src\it</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\itorder.c">
+      <Filter>src\it</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\itread.c">
+      <Filter>src\it\readers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\itread2.c">
+      <Filter>src\it\readers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\itrender.c">
+      <Filter>src\it</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\itunload.c">
+      <Filter>src\it</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\load669.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\load6692.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\loadamf.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\loadamf2.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\loadasy.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\loadasy2.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\loadmod.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\loadmod2.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\loadmtm.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\loadmtm2.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\loadoldpsm.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\loadoldpsm2.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\loadpsm.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\loadpsm2.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\loadptm.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\loadptm2.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\loadriff.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\loadriff2.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\loads3m.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\loads3m2.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\loadstm.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\loadstm2.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\loadxm.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\loadxm2.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\ptmeffect.c">
+      <Filter>src\it</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\read669.c">
+      <Filter>src\it\readers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\read6692.c">
+      <Filter>src\it\readers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\readam.c">
+      <Filter>src\it\readers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\readamf.c">
+      <Filter>src\it\readers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\readamf2.c">
+      <Filter>src\it\readers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\readasy.c">
+      <Filter>src\it\readers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\readdsmf.c">
+      <Filter>src\it\readers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\readmod.c">
+      <Filter>src\it\readers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\readmod2.c">
+      <Filter>src\it\readers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\readmtm.c">
+      <Filter>src\it\readers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\readoldpsm.c">
+      <Filter>src\it\readers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\readpsm.c">
+      <Filter>src\it\readers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\readptm.c">
+      <Filter>src\it\readers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\readriff.c">
+      <Filter>src\it\readers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\reads3m.c">
+      <Filter>src\it\readers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\reads3m2.c">
+      <Filter>src\it\readers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\readstm.c">
+      <Filter>src\it\readers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\readstm2.c">
+      <Filter>src\it\readers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\readxm.c">
+      <Filter>src\it\readers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\readxm2.c">
+      <Filter>src\it\readers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\xmeffect.c">
+      <Filter>src\it</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\readokt.c">
+      <Filter>src\it\readers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\readokt2.c">
+      <Filter>src\it\readers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\loadokt.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\loadokt2.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\loadany.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\loadany2.c">
+      <Filter>src\it\loaders</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\readany.c">
+      <Filter>src\it\readers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\it\readany2.c">
+      <Filter>src\it\readers</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\helpers\resampler.c">
+      <Filter>src\helpers</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\include\dumb.h">
+      <Filter>include</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\internal\barray.h">
+      <Filter>include\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\internal\dumb.h">
+      <Filter>include\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\internal\dumbfile.h">
+      <Filter>include\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\internal\fir_resampler.h">
+      <Filter>include\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\internal\it.h">
+      <Filter>include\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\internal\lpc.h">
+      <Filter>include\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\internal\riff.h">
+      <Filter>include\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\internal\stack_alloc.h">
+      <Filter>include\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\internal\resampler.h">
+      <Filter>include\internal</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <CustomBuild Include="..\..\src\helpers\resamp3.inc">
+      <Filter>src\helpers</Filter>
+    </CustomBuild>
+    <CustomBuild Include="..\..\src\helpers\resamp2.inc">
+      <Filter>src\helpers</Filter>
+    </CustomBuild>
+    <CustomBuild Include="..\..\src\helpers\resample.inc">
+      <Filter>src\helpers</Filter>
+    </CustomBuild>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/libraries/game-music-emu/CMakeLists.txt b/libraries/game-music-emu/CMakeLists.txt
new file mode 100644
index 000000000..8569b1b7a
--- /dev/null
+++ b/libraries/game-music-emu/CMakeLists.txt
@@ -0,0 +1,146 @@
+# CMake project definition file.
+project(libgme)
+
+include (CheckCXXCompilerFlag)
+
+# When version is changed, also change the one in gme/gme.h to match
+set(GME_VERSION 0.6.2 CACHE INTERNAL "libgme Version")
+
+# 2.6+ always assumes FATAL_ERROR, but 2.4 and below don't.
+# Of course, 2.4 might work, in which case you're welcome to drop
+# down the requirement, but I can't test that.
+cmake_minimum_required(VERSION 2.6 FATAL_ERROR)
+
+# I don't plan on debugging this, so make it a release build.
+if( NOT CMAKE_BUILD_TYPE MATCHES "Release" )
+    set( CMAKE_BUILD_TYPE "RelWithDebInfo" )
+endif()
+
+if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE )
+    set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra" )
+    if( NOT PROFILE )
+        set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer" )
+    endif()
+    check_cxx_compiler_flag( -Wno-array-bounds HAVE_NO_ARRAY_BOUNDS )
+    if( HAVE_NO_ARRAY_BOUNDS )
+        set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-array-bounds" )
+    endif()
+endif()
+
+#[ZDoom] Disable most of bogus and annoying MSVC warnings
+if( MSVC )
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4101 /wd4800 /wd4702 /wd4706 /wd4805 /wd4310 /wd4244 /wd4456 /wd4459 /wd4146 /wd4127 /wd4458 /wd4267 /wd4804")
+endif()
+
+# Enable fast flag for GME
+set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ZD_FASTMATH_FLAG}" )
+
+# Default emulators to build (all of them! ;)
+# [ZDoom] No options, enable all of them by default.
+
+#if (NOT DEFINED USE_GME_AY)
+    SET(USE_GME_AY 1 CACHE BOOL "Enable support for Spectrum ZX music emulation")
+#endif()
+
+#if (NOT DEFINED USE_GME_GBS)
+    SET(USE_GME_GBS 1 CACHE BOOL "Enable support for Game Boy music emulation")
+#endif()
+
+#if (NOT DEFINED USE_GME_GYM)
+    SET(USE_GME_GYM 1 CACHE BOOL "Enable Sega MegaDrive/Genesis music emulation")
+#endif()
+
+#if (NOT DEFINED USE_GME_HES)
+    SET(USE_GME_HES 1 CACHE BOOL "Enable PC Engine/TurboGrafx-16 music emulation")
+#endif()
+
+#if (NOT DEFINED USE_GME_KSS)
+    SET(USE_GME_KSS 1 CACHE BOOL "Enable MSX or other Z80 systems music emulation")
+#endif()
+
+#if (NOT DEFINED USE_GME_NSF)
+    SET(USE_GME_NSF 1 CACHE BOOL "Enable NES NSF music emulation")
+#endif()
+
+#if (NOT DEFINED USE_GME_NSFE)
+    SET(USE_GME_NSFE 1 CACHE BOOL "Enable NES NSFE and NSF music emulation")
+#endif()
+
+#if (NOT DEFINED USE_GME_SAP)
+    SET(USE_GME_SAP 1 CACHE BOOL "Enable Atari SAP music emulation")
+#endif()
+
+#if (NOT DEFINED USE_GME_SPC)
+    SET(USE_GME_SPC 1 CACHE BOOL "Enable SNES SPC music emulation")
+#endif()
+
+#if (NOT DEFINED USE_GME_VGM)
+    SET(USE_GME_VGM 1 CACHE BOOL "Enable Sega VGM/VGZ music emulation")
+#endif()
+
+#if (NOT DEFINED GME_YM2612_EMU)
+    SET(GME_YM2612_EMU "Nuked" CACHE STRING "Which YM2612 emulator to use: \"Nuked\" (LGPLv2.1+), \"MAME\" (GPLv2+), or \"GENS\" (LGPLv2.1+)")
+#endif()
+
+#if (USE_GME_NSFE AND NOT USE_GME_NSF)
+    #MESSAGE(" -- NSFE support requires NSF, enabling NSF support. --")
+    SET(USE_GME_NSF 1 CACHE BOOL "Enable NES NSF music emulation" FORCE)
+#endif()
+
+# [ZDoom] Set always to OFF.
+set(BUILD_SHARED_LIBS OFF)
+set(ENABLE_UBSAN  OFF)
+
+# Check for GCC/Clang "visibility" support.
+if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU"
+    OR
+    CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+
+    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -W -Wextra")
+    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
+
+    # Assume we have visibility support on any compiler that supports C++11
+    add_definitions (-DLIBGME_VISIBILITY)
+    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden -fvisibility-inlines-hidden")
+
+    # Try to protect against undefined behavior from signed integer overflow
+    # This has caused miscompilation of code already and there are other
+    # potential uses; see https://bitbucket.org/mpyne/game-music-emu/issues/18/
+    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fwrapv")
+
+    if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+        if (NOT DEFINED LIBGME_SWITCH_FALLTHROUGH)
+            check_cxx_compiler_flag (-Wimplicit-fallthrough __LIBGME_SWITCH_FALLTHROUGH_WARNINGS)
+            set (LIBGME_SWITCH_FALLTHROUGH ${__LIBGME_SWITCH_FALLTHROUGH_WARNINGS}
+                CACHE BOOL "Set if the compiler will complain about implicit switch fallthrough"
+                )
+        endif()
+    elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-inconsistent-missing-override -Wno-unused-const-variable")
+    endif()
+
+    if (ENABLE_UBSAN)
+        # GCC needs -static-libubsan
+        if (NOT BUILD_SHARED_LIBS AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+            set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined -static-libubsan")
+        else()
+            set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined")
+        endif()
+    endif()
+endif ()
+
+if(LIBGME_SWITCH_FALLTHROUGH)
+     # Avoid warning spam about switch fallthroughs, which are numerous in
+     # the codebase.
+     set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wimplicit-fallthrough=0")
+endif()
+
+# Shared library defined here
+add_subdirectory(gme)
+
+# EXCLUDE_FROM_ALL adds build rules but keeps it out of default build
+# [ZDoom] Not needed.
+if( FALSE )
+add_subdirectory(player EXCLUDE_FROM_ALL)
+add_subdirectory(demo EXCLUDE_FROM_ALL)
+endif()
diff --git a/libraries/game-music-emu/changes.txt b/libraries/game-music-emu/changes.txt
new file mode 100644
index 000000000..034ba4821
--- /dev/null
+++ b/libraries/game-music-emu/changes.txt
@@ -0,0 +1,5 @@
+Game_Music_Emu Change Log
+-------------------------
+
+Please see the git version history (e.g. git shortlog tags/0.6.0..tags/0.6.1)
+for the accurate change log.
diff --git a/libraries/game-music-emu/design.txt b/libraries/game-music-emu/design.txt
new file mode 100644
index 000000000..d79c860f7
--- /dev/null
+++ b/libraries/game-music-emu/design.txt
@@ -0,0 +1,194 @@
+Game_Music_Emu 0.6.0 Design
+---------------------------
+This might be slightly out-of-date at times, but will be a big help in
+understanding the library implementation.
+
+
+Architecture
+------------
+The library is essentially a bunch of independent game music file
+emulators unified with a common interface.
+
+Gme_File and Music_Emu provide a common interface to the emulators. The
+virtual functions are protected rather than public to allow pre- and
+post-processing of arguments and data in one place. This allows the
+emulator classes to assume that everything is set up properly when
+starting a track and playing samples.
+
+All file input is done with the Data_Reader interface. Many derived
+classes are present, for the usual disk-based file and block of memory,
+to specialized adaptors for things like reading a subset of data or
+combining a block of memory with a Data_Reader to the remaining data.
+This makes the library much more flexible with regard to the source of
+game music file data. I still added a specialized load_mem() function to
+have the emulator keep a pointer to data already read in memory, for
+those formats whose files can be absolutely huge (GYM, some VGMs). This
+is important if for some reason the caller must load the data ahead of
+time, but doesn't want the emulator needlessly making a copy.
+
+Since silence checking and fading are relatively complex, they are kept
+separate from basic file loading and track information, which are
+handled in the base class Gme_File. My original intent was to use
+Gme_File as the common base class for full emulators and track
+information-only readers, but implementing the C interface was much
+simpler if both derived from Music_Emu. User C++ code can still benefit
+from static checking by using Gme_File where only track information will
+be accessed.
+
+Each emulator generally has three components: main emulator, CPU
+emulator, and sound chip emulator(s). Each component has minimal
+coupling, so use in a full emulator or stand alone is fairly easy. This
+modularity really helps reduce complexity. Blip_Buffer helps a lot with
+simplifying the APU interfaces and implementation.
+
+The "classic" emulators derive from Classic_Emu, which handles
+Blip_Buffer filling and multiple channels. It uses Multi_Buffer for
+output, allowing you to derive a custom buffer that could output each
+voice to a separate sound channel and do different processing on each.
+At some point I'm going to implement a better Effects_Buffer that allows
+individual control of every channel.
+
+In implementing the C interface, I wanted a way to specify an emulator
+type that didn't require linking in all the emulators. For each emulator
+type there is a global object with pointers to functions to create the
+emulator or a track information reader. The emulator type is thus a
+pointer to this, which conveniently allows for a NULL value. The user
+referencing this emulator type object is what ultimately links the
+emulator in (unless new Foo_Emu is used in C++, of course). This type
+also serves as a useful substitute for RTTI on older C++ compilers.
+
+Addendum: I have since added gme_type_list(), which causes all listed
+emulators to be linked in. To avoid this, I make the list itself
+editable in blargg_config.h. Having a built-in list allows
+gme_load_file() to take a path and give back an emulator with the file
+loaded, which is extremely useful for new users.
+
+
+Interface conventions
+----------------------
+If a function retains a pointer to or replaces the value of an object
+passed, it takes a pointer so that it will be clear in the caller's
+source code that care is required.
+
+Multi-word names have an underscore '_' separator between individual
+words.
+
+Functions are named with lowercase words. Functions which perform an
+action with side-effects are named with a verb phrase (i.e. load, move,
+run). Functions which return the value of a piece of state are named
+using a noun phrase (i.e. loaded, moved, running).
+
+Classes are named with capitalized words. Only the first letter of an
+acronym is capitalized. Class names are nouns, sometimes suggestive of
+what they do (i.e. File_Scanner).
+
+Structure, enumeration, and typedefs to these and built-in types are
+named using lowercase words with a _t suffix.
+
+Macros are named with all-uppercase words.
+
+Internal names which can't be hidden due to technical reasons have an
+underscore '_' suffix.
+
+
+Managing Complexity
+-------------------
+Complexity has been a factor in most library decisions. Many features
+have been passed by due to the complexity they would add. Once
+complexity goes past a certain level, it mentally grasping the library
+in its entirety, at which point more defects will occur and be hard to
+find.
+
+I chose 16-bit signed samples because it seems to be the most common
+format. Supporting multiple formats would add too much complexity to be
+worth it. Other formats can be obtained via conversion.
+
+I've kept interfaces fairly lean, leaving many possible features
+untapped but easy to add if necessary. For example the classic emulators
+could have volume and frequency equalization adjusted separately for
+each channel, since they each have an associated Blip_Synth.
+
+Source files of 400 lines or less seem to be the best size to limit
+complexity. In a few cases there is no reasonable way to split longer
+files, or there is benefit from having the source together in one file.
+
+
+Preventing Bugs
+---------------
+I've done many things to reduce the opportunity for defects. A general
+principle is to write code so that defects will be as visible as
+possible. I've used several techniques to achieve this.
+
+I put assertions at key points where defects seem likely or where
+corruption due to a defect is likely to be visible. I've also put
+assertions where violations of the interface are likely. In emulators
+where I am unsure of exact hardware operation in a particular case, I
+output a debug-only message noting that this has occurred; many times I
+haven't implemented a hardware feature because nothing uses it. I've
+made code brittle where there is no clear reason flexibility; code
+written to handle every possibility sacrifices quality and reliability
+to handle vaguely defined situations.
+
+
+Flexibility through indirection
+-------------------------------
+I've tried to allow the most flexibility of modules by using indirection
+to allow extension by the user. This keeps each module simpler and more
+focused on its unique task.
+
+The classic emulators use Multi_Buffer, which potentially allows a
+separate Blip_Buffer for each channel. This keeps emulators free of
+typical code to allow output in mono, stereo, panning, etc.
+
+All emulators use a reader object to access file data, allowing it to be
+stored in a regular file, compressed archive, memory, or generated
+on-the-fly. Again, the library can be kept free of the particulars of
+file access and changes required to support new formats.
+
+
+Emulators in general
+--------------------
+When I wrote the first NES sound emulator, I stored most of the state in
+an emulator-specific format, with significant redundancy. In the
+register write function I decoded everything into named variables. I
+became tired of the verbosity and wanted to more closely model the
+hardware, so I moved to a style of storing the last written value to
+each register, along with as little other state as possible, mostly the
+internal hardware registers. While this involves slightly more
+recalculation, in most cases the emulation code is of comparable size.
+It also makes state save/restore (for use in a full emulator) much
+simpler. Finally, it makes debugging easier since the hardware registers
+used in emulation are obvious.
+
+
+CPU Cores
+---------
+I've spent lots of time coming up with techniques to optimize the CPU
+cores. Some of the most important: execute multiple instructions during
+an emulation call, keep state in local variables to allow register
+assignment, optimize state representation for most common instructions,
+defer status flag calculation until actually needed, read program code
+directly without a call to the memory read function, always pre-fetch
+the operand byte before decoding instruction, and emulate instructions
+using common blocks of code.
+
+I've successfully used Nes_Cpu in a fairly complete NES emulator, and
+I'd like to make all the CPU emulators suitable for use in emulators. It
+seems a waste for them to be used only for the small amount of emulation
+necessary for game music files.
+
+I debugged the CPU cores by writing a test shell that ran them in
+parallel with other CPU cores and compared all memory accesses and
+processor states at each step. This provided good value at little cost.
+
+The CPU mapping page size is adjustable to allow the best tradeoff
+between memory/cache usage and handler granularity. The interface allows
+code to be somewhat independent of the page size.
+
+I optimize program memory accesses to direct reads rather than calls to
+the memory read function. My assumption is that it would be difficult to
+get useful code out of hardware I/O addresses, so no software will
+intentionally execute out of I/O space. Since the page size can be
+changed easily, most program memory mapping schemes can be accommodated.
+This greatly reduces memory access function calls.
+
diff --git a/libraries/game-music-emu/gme.txt b/libraries/game-music-emu/gme.txt
new file mode 100644
index 000000000..5a7d2f560
--- /dev/null
+++ b/libraries/game-music-emu/gme.txt
@@ -0,0 +1,376 @@
+Game_Music_Emu 0.6.2
+--------------------
+Author     : Shay Green <gblargg@gmail.com>
+Maintainer : Michael Pyne <mpyne@purinchu.net>
+Website    : https://bitbucket.org/mpyne/game-music-emu/
+Source     : https://bitbucket.org/mpyne/game-music-emu/
+License    : GNU Lesser General Public License (LGPL), see LICENSE.txt
+
+Contents
+--------
+* Overview
+* Error handling
+* Emulator types
+* M3U playlist support
+* Information fields
+* Track length
+* Loading file data
+* Sound parameters
+* VGM/GYM YM2413 & YM2612 FM sound
+* Modular construction
+* Obscure features
+* Solving problems
+* Thanks
+
+
+Overview
+--------
+This library can open game music files, play tracks, and read game and
+track information tags. To play a game music file, do the following:
+
+* Open the file with gme_open_file()
+* Start a track with gme_start_track();
+* Generate samples as needed with gme_play()
+* Play samples through speaker using your operating system
+* Delete emulator when done with gme_delete()
+
+Your code must arrange for the generated samples to be played through
+the computer's speaker using whatever method your operating system
+requires.
+
+There are many additional features available; you can:
+
+* Determine of the type of a music file without opening it with
+gme_identify_*()
+* Load just the file's information tags with gme_info_only
+* Load from a block of memory rather than a file with gme_load_data()
+* Arrange for a fade-out at a particular time with gme_set_fade
+* Find when a track has ended with gme_track_ended()
+* Seek to a new time in the track with gme_seek()
+* Load an extended m3u playlist with gme_load_m3u()
+* Get a list of the voices (channels) and mute them individually with
+gme_voice_names() and gme_mute_voice()
+* Change the playback tempo without affecting pitch with gme_set_tempo()
+* Adjust treble/bass equalization with gme_set_equalizer()
+* Associate your own data with an emulator and later get it back with
+gme_set_user_data()
+* Register a function of yours to be called back when the emulator is
+deleted with gme_set_user_cleanup()
+
+Refer to gme.h for a comprehensive summary of features.
+
+
+Error handling
+--------------
+Functions which can fail have a return type of gme_err_t, which is a
+pointer to an error string (const char*). If a function is successful it
+returns NULL. Errors that you can easily avoid are checked with debug
+assertions; gme_err_t return values are only used for genuine run-time
+errors that can't be easily predicted in advance (out of memory, I/O
+errors, incompatible file data). Your code should check all error
+values.
+
+When loading a music file in the wrong emulator or trying to load a
+non-music file, gme_wrong_file_type is returned. You can check for this
+error in C++ like this:
+
+	gme_err_t err = gme_open_file( path, &emu );
+	if ( err == gme_wrong_file_type )
+		...
+
+To check for minor problems, call gme_warning() to get a string
+describing the last warning. Your player should allow the user some way
+of knowing when this is the case, since these minor errors could affect
+playback. Without this information the user can't solve problems as
+well. When playing a track, gme_warning() returns minor playback-related
+problems (major playback problems end the track immediately and set the
+warning string).
+
+
+Emulator types
+--------------
+The library includes several game music emulators that each support a
+different file type. Each is identified by a gme_type_t constant defined
+in gme.h, for example gme_nsf_emu is for the NSF emulator. If you use
+gme_open_file() or gme_open_data(), the library does the work of
+determining the file type and creating an appropriate emulator. If you
+want more control over this process, read on.
+
+There are two basic ways to identify a game music file's type: look at
+its file extension, or read the header data. The library includes
+functions to help with both methods. The first is preferable because it
+is fast and the most common way to identify files. Sometimes the
+extension is lost or wrong, so the header must be read.
+
+Use gme_identify_extension() to find the correct game music type based
+on a filename. To identify a file based on its extension and header
+contents, use gme_identify_file(). If you read the header data yourself,
+use gme_identify_header().
+
+If you want to remove support for some music types to reduce your
+executable size, edit GME_TYPE_LIST in blargg_config.h. For example, to
+support just NSF and GBS, use this:
+
+	#define GME_TYPE_LIST \
+		gme_nsf_type,\
+		gme_gbs_type
+
+
+M3U playlist support
+--------------------
+The library supports playlists in an extended m3u format with
+gme_load_m3u() to give track names and times to multi-song formats: AY,
+GBS, HES, KSS, NSF, NSFE, and SAP. Some aspects of the file format
+itself is not well-defined so some m3u files won't work properly
+(particularly those provided with KSS files). Only m3u files referencing
+a single file are supported; your code must handle m3u files covering
+more than one game music file, though it can use the built-in m3u
+parsing provided by the library.
+
+
+Information fields
+------------------
+Support is provided for the various text fields and length information
+in a file with gme_track_info(). If you just need track information for
+a file (for example, building a playlist), use gme_new_info() in place
+of gme_new_emu(), load the file normally, then you can access the track
+count and info, but nothing else.
+
+             M3U  VGM  GYM  SPC  SAP  NSFE  NSF  AY  GBS  HES  KSS
+             -------------------------------------------------------
+Track Count | *    *    *    *    *    *    *    *    *
+            |
+System      |      *    *    *    *    *    *    *    *    *    *
+            |
+Game        |      *    *    *         *    *         *    *
+            |
+Song        | *    *    *    *    *    *         *
+            |
+Author      |      *         *    *    *    *    *    *    *
+            |
+Copyright   |      *    *    *    *    *    *         *    *
+            |
+Comment     |      *    *    *                   *
+            |
+Dumper      |      *    *    *         *
+            |
+Length      | *    *    *    *    *    *
+            |
+Intro Length| *    *    *
+            |
+Loop Length | *    *    *
+
+As listed above, the HES and KSS file formats don't include a track
+count, and tracks are often scattered over the 0-255 range, so an m3u
+playlist for these is a must.
+
+Unavailable text fields are set to an empty string and times to -1. Your
+code should be prepared for any combination of available and unavailable
+fields, as a particular music file might not use all of the supported
+fields listed above.
+
+Currently text fields are truncated to 255 characters. Obscure fields of
+some formats are not currently decoded; contact me if you want one
+added.
+
+
+Track length
+------------
+The library leaves it up to you as to when to stop playing a track. You
+can ask for available length information and then tell the library what
+time it should start fading the track with gme_set_fade(). By default it
+also continually checks for 6 or more seconds of silence to mark the end
+of a track. Here is a reasonable algorithm you can use to decide how
+long to play a track:
+
+* If the track length is > 0, use it
+* If the loop length > 0, play for intro + loop * 2
+* Otherwise, default to 2.5 minutes (150000 msec)
+
+If you want to play a track longer than normal, be sure the loop length
+isn't zero. See Music_Player.cpp around line 145 for example code.
+
+By default, the library skips silence at the beginning of a track. It
+also continually checks for the end of a non-looping track by watching
+for 6 seconds of unbroken silence. When doing this is scans *ahead* by
+several seconds so it can report the end of the track after only one
+second of silence has actually played. This feature can be disabled with
+gme_ignore_silence().
+
+
+Loading file data
+-----------------
+The library allows file data to be loaded in many different ways. All
+load functions return an error which you should check. The following
+examples assume these variables:
+
+	Music_Emu* emu;
+	gme_err_t error;
+
+If you're letting the library determine a file's type, you can use
+either gme_open_file() or gme_open_data():
+	
+	error = gme_open_file( pathname, &emu );
+	error = gme_open_data( pointer, size, &emu );
+
+If you're manually determining file type and using used gme_new_emu() to
+create an emulator, you can use the following methods of loading:
+
+* From a block of memory:
+
+	error = gme_load_data( emu, pointer, size );
+
+* Have library call your function to read data:
+
+	gme_err_t my_read( void* my_data, void* out, long count )
+	{
+		// code that reads 'count' bytes into 'out' buffer
+		// and return 0 if no error
+	}
+	
+	error = gme_load_custom( emu, my_read, file_size, my_data );
+
+
+Sound parameters
+----------------
+All emulators support an arbitrary output sampling rate. A rate of 44100
+Hz should work well on most systems. Since band-limited synthesis is
+used, a sampling rate above 48000 Hz is not necessary and will actually
+reduce sound quality and performance.
+
+All emulators also support adjustable gain, mainly for the purpose of
+getting consistent volume between different music formats and avoiding
+excessive modulation. The gain can only be set *before* setting the
+emulator's sampling rate, so it's not useful as a general volume
+control. The default gains of emulators are set so that they give
+generally similar volumes, though some soundtracks are significantly
+louder or quieter than normal.
+
+Some emulators support adjustable treble and bass frequency equalization
+(AY, GBS, HES, KSS, NSF, NSFE, SAP, VGM) using set_equalizer().
+Parameters are specified using gme_equalizer_t eq = { treble_dB,
+bass_freq }. Treble_dB sets the treble level (in dB), where 0.0 dB gives
+normal treble; -200.0 dB is quite muffled, and 5.0 dB emphasizes treble
+for an extra crisp sound. Bass_freq sets the frequency where bass
+response starts to diminish; 15 Hz is normal, 0 Hz gives maximum bass,
+and 15000 Hz removes all bass. For example, the following makes the
+sound extra-crisp but lacking bass:
+
+	gme_equalizer_t eq = { 5.0, 1000 };
+	gme_set_equalizer( music_emu, &eq );
+
+Each emulator's equalization defaults to approximate the particular
+console's sound quality; this default can be determined by calling
+equalizer() just after creating the emulator. The Music_Emu::tv_eq
+profile gives sound as if coming from a TV speaker, and some emulators
+include other profiles for different versions of the system. For
+example, to use Famicom sound equalization with the NSF emulator, do the
+following:
+
+	music_emu->set_equalizer( Nsf_Emu::famicom_eq );
+
+
+VGM/GYM YM2413 & YM2612 FM sound
+--------------------------------
+The library plays Sega Genesis/Mega Drive music using a YM2612 FM sound
+chip emulator based on the Gens project. Because this has some
+inaccuracies, other YM2612 emulators can be used in its place by
+re-implementing the interface in YM2612_Emu.h. Available on my website
+is a modified version of MAME's YM2612 emulator, which sounds better in
+some ways and whose author is still making improvements.
+
+VGM music files using the YM2413 FM sound chip are also supported, but a
+YM2413 emulator isn't included with the library due to technical
+reasons. I have put one of the available YM2413 emulators on my website
+that can be used directly.
+
+
+Modular construction
+--------------------
+The library is made of many fairly independent modules. If you're using
+only one music file emulator, you can eliminate many of the library
+sources from your program. Refer to the files list in readme.txt to get
+a general idea of what can be removed, and be sure to edit GME_TYPE_LIST
+(see "Emulator types" above). Post to the forum if you'd like me to put
+together a smaller version for a particular use, as this only takes me a
+few minutes to do.
+
+If you want to use one of the individual sound chip emulators (or CPU
+cores) in your own console emulator, first check the libraries page on
+my website since I have released several of them as stand alone
+libraries with included documentation and examples on their use. If you
+don't find it as a standalone library, contact me and I'll consider
+separating it.
+
+The "classic" sound chips use my Blip_Buffer library, which greatly
+simplifies their implementation and efficiently handles band-limited
+synthesis. It is also available as a stand alone library with
+documentation and many examples.
+
+
+Obscure features
+----------------
+The library's flexibility allows many possibilities. Contact me if you
+want help implementing ideas or removing limitations.
+
+* Uses no global/static variables, allowing multiple instances of any
+emulator. This is useful in a music player if you want to allow
+simultaneous recording or scanning of other tracks while one is already
+playing. This will also be useful if your platform disallows global
+data.
+
+* Emulators that support a custom sound buffer can have *every* voice
+routed to a different Blip_Buffer, allowing custom processing on each
+voice. For example you could record a Game Boy track as a 4-channel
+sound file.
+
+* Defining BLIP_BUFFER_FAST uses lower quality, less-multiply-intensive
+synthesis on "classic" emulators, which might help on some really old
+processors. This significantly lowers sound quality and prevents treble
+equalization. Try this if your platform's processor isn't fast enough
+for normal quality. Even on my ten-year-old 400 MHz Mac, this reduces
+processor usage at most by about 0.6% (from 4% to 3.4%), hardly worth
+the quality loss.
+
+
+Solving problems
+----------------
+If you're having problems, try the following:
+
+* If you're getting garbled sound, try this simple siren generator in
+place of your call to play(). This will quickly tell whether the problem
+is in the library or in your code.
+
+	static void play_siren( long count, short* out )
+	{
+		static double a, a2;
+		while ( count-- )
+			*out++ = 0x2000 * sin( a += .1 + .05*sin( a2+=.00005 ) );
+	}
+
+* Enable debugging support in your environment. This enables assertions
+and other run-time checks.
+
+* Turn the compiler's optimizer is off. Sometimes an optimizer generates
+bad code.
+
+* If multiple threads are being used, ensure that only one at a time is
+accessing a given set of objects from the library. This library is not
+in general thread-safe, though independent objects can be used in
+separate threads.
+
+* If all else fails, see if the demos work.
+
+
+Thanks
+------
+Big thanks to Chris Moeller (kode54) for help with library testing and
+feedback, for maintaining the Foobar2000 plugin foo_gep based on it, and
+for original work on openspc++ that was used when developing Spc_Emu.
+Brad Martin's excellent OpenSPC SNES DSP emulator worked well from the
+start. Also thanks to Richard Bannister, Mahendra Tallur, Shazz,
+nenolod, theHobbit, Johan Samuelsson, and nes6502 for testing, using,
+and giving feedback for the library in their respective game music
+players. More recently, Lucas Paul and Michael Pyne have helped nudge the
+library into a public repository and get its interface more stable for use
+in shared libraries.
diff --git a/libraries/game-music-emu/gme/Ay_Apu.cpp b/libraries/game-music-emu/gme/Ay_Apu.cpp
new file mode 100644
index 000000000..d132c42f9
--- /dev/null
+++ b/libraries/game-music-emu/gme/Ay_Apu.cpp
@@ -0,0 +1,395 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Ay_Apu.h"
+
+/* Copyright (C) 2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+// Emulation inaccuracies:
+// * Noise isn't run when not in use
+// * Changes to envelope and noise periods are delayed until next reload
+// * Super-sonic tone should attenuate output to about 60%, not 50%
+
+// Tones above this frequency are treated as disabled tone at half volume.
+// Power of two is more efficient (avoids division).
+unsigned const inaudible_freq = 16384;
+
+int const period_factor = 16;
+
+static byte const amp_table [16] =
+{
+#define ENTRY( n ) byte (n * Ay_Apu::amp_range + 0.5)
+	// With channels tied together and 1K resistor to ground (as datasheet recommends),
+	// output nearly matches logarithmic curve as claimed. Approx. 1.5 dB per step.
+	ENTRY(0.000000),ENTRY(0.007813),ENTRY(0.011049),ENTRY(0.015625),
+	ENTRY(0.022097),ENTRY(0.031250),ENTRY(0.044194),ENTRY(0.062500),
+	ENTRY(0.088388),ENTRY(0.125000),ENTRY(0.176777),ENTRY(0.250000),
+	ENTRY(0.353553),ENTRY(0.500000),ENTRY(0.707107),ENTRY(1.000000),
+	
+	/*
+	// Measured from an AY-3-8910A chip with date code 8611.
+	
+	// Direct voltages without any load (very linear)
+	ENTRY(0.000000),ENTRY(0.046237),ENTRY(0.064516),ENTRY(0.089785),
+	ENTRY(0.124731),ENTRY(0.173118),ENTRY(0.225806),ENTRY(0.329032),
+	ENTRY(0.360215),ENTRY(0.494624),ENTRY(0.594624),ENTRY(0.672043),
+	ENTRY(0.766129),ENTRY(0.841935),ENTRY(0.926882),ENTRY(1.000000),
+	// With only some load
+	ENTRY(0.000000),ENTRY(0.011940),ENTRY(0.017413),ENTRY(0.024876),
+	ENTRY(0.036318),ENTRY(0.054229),ENTRY(0.072637),ENTRY(0.122388),
+	ENTRY(0.174129),ENTRY(0.239303),ENTRY(0.323881),ENTRY(0.410945),
+	ENTRY(0.527363),ENTRY(0.651741),ENTRY(0.832338),ENTRY(1.000000),
+	*/
+#undef ENTRY
+};
+
+static byte const modes [8] =
+{
+#define MODE( a0,a1, b0,b1, c0,c1 ) \
+		(a0 | a1<<1 | b0<<2 | b1<<3 | c0<<4 | c1<<5)
+	MODE( 1,0, 1,0, 1,0 ),
+	MODE( 1,0, 0,0, 0,0 ),
+	MODE( 1,0, 0,1, 1,0 ),
+	MODE( 1,0, 1,1, 1,1 ),
+	MODE( 0,1, 0,1, 0,1 ),
+	MODE( 0,1, 1,1, 1,1 ),
+	MODE( 0,1, 1,0, 0,1 ),
+	MODE( 0,1, 0,0, 0,0 ),
+};
+
+Ay_Apu::Ay_Apu()
+{
+	// build full table of the upper 8 envelope waveforms
+	for ( int m = 8; m--; )
+	{
+		byte* out = env.modes [m];
+		int flags = modes [m];
+		for ( int x = 3; --x >= 0; )
+		{
+			int amp = flags & 1;
+			int end = flags >> 1 & 1;
+			int step = end - amp;
+			amp *= 15;
+			for ( int y = 16; --y >= 0; )
+			{
+				*out++ = amp_table [amp];
+				amp += step;
+			}
+			flags >>= 2;
+		}
+	}
+	
+	output( 0 );
+	volume( 1.0 );
+	reset();
+}
+
+void Ay_Apu::reset()
+{
+	last_time   = 0;
+	noise.delay = 0;
+	noise.lfsr  = 1;
+	
+	osc_t* osc = &oscs [osc_count];
+	do
+	{
+		osc--;
+		osc->period   = period_factor;
+		osc->delay    = 0;
+		osc->last_amp = 0;
+		osc->phase    = 0;
+	}
+	while ( osc != oscs );
+	
+	for ( int i = sizeof regs; --i >= 0; )
+		regs [i] = 0;
+	regs [7] = 0xFF;
+	write_data_( 13, 0 );
+}
+
+void Ay_Apu::write_data_( int addr, int data )
+{
+	assert( (unsigned) addr < reg_count );
+	
+	if ( (unsigned) addr >= 14 )
+	{
+		#ifdef debug_printf
+			debug_printf( "Wrote to I/O port %02X\n", (int) addr );
+		#endif
+	}
+	
+	// envelope mode
+	if ( addr == 13 )
+	{
+		if ( !(data & 8) ) // convert modes 0-7 to proper equivalents
+			data = (data & 4) ? 15 : 9;
+		env.wave = env.modes [data - 7];
+		env.pos = -48;
+		env.delay = 0; // will get set to envelope period in run_until()
+	}
+	regs [addr] = data;
+	
+	// handle period changes accurately
+	int i = addr >> 1;
+	if ( i < osc_count )
+	{
+		blip_time_t period = (regs [i * 2 + 1] & 0x0F) * (0x100L * period_factor) +
+				regs [i * 2] * period_factor;
+		if ( !period )
+			period = period_factor;
+		
+		// adjust time of next timer expiration based on change in period
+		osc_t& osc = oscs [i];
+		if ( (osc.delay += period - osc.period) < 0 )
+			osc.delay = 0;
+		osc.period = period;
+	}
+	
+	// TODO: same as above for envelope timer, and it also has a divide by two after it
+}
+
+int const noise_off = 0x08;
+int const tone_off  = 0x01;
+
+void Ay_Apu::run_until( blip_time_t final_end_time )
+{
+	require( final_end_time >= last_time );
+	
+	// noise period and initial values
+	blip_time_t const noise_period_factor = period_factor * 2; // verified
+	blip_time_t noise_period = (regs [6] & 0x1F) * noise_period_factor;
+	if ( !noise_period )
+		noise_period = noise_period_factor;
+	blip_time_t const old_noise_delay = noise.delay;
+	blargg_ulong const old_noise_lfsr = noise.lfsr;
+	
+	// envelope period
+	blip_time_t const env_period_factor = period_factor * 2; // verified
+	blip_time_t env_period = (regs [12] * 0x100L + regs [11]) * env_period_factor;
+	if ( !env_period )
+		env_period = env_period_factor; // same as period 1 on my AY chip
+	if ( !env.delay )
+		env.delay = env_period;
+	
+	// run each osc separately
+	for ( int index = 0; index < osc_count; index++ )
+	{
+		osc_t* const osc = &oscs [index];
+		int osc_mode = regs [7] >> index;
+		
+		// output
+		Blip_Buffer* const osc_output = osc->output;
+		if ( !osc_output )
+			continue;
+		osc_output->set_modified();
+		
+		// period
+		int half_vol = 0;
+		blip_time_t inaudible_period = (blargg_ulong) (osc_output->clock_rate() +
+				inaudible_freq) / (inaudible_freq * 2);
+		if ( osc->period <= inaudible_period && !(osc_mode & tone_off) )
+		{
+			half_vol = 1; // Actually around 60%, but 50% is close enough
+			osc_mode |= tone_off;
+		}
+		
+		// envelope
+		blip_time_t start_time = last_time;
+		blip_time_t end_time   = final_end_time;
+		int const vol_mode = regs [0x08 + index];
+		int volume = amp_table [vol_mode & 0x0F] >> half_vol;
+		int osc_env_pos = env.pos;
+		if ( vol_mode & 0x10 )
+		{
+			volume = env.wave [osc_env_pos] >> half_vol;
+			// use envelope only if it's a repeating wave or a ramp that hasn't finished
+			if ( !(regs [13] & 1) || osc_env_pos < -32 )
+			{
+				end_time = start_time + env.delay;
+				if ( end_time >= final_end_time )
+					end_time = final_end_time;
+				
+				//if ( !(regs [12] | regs [11]) )
+				//  debug_printf( "Used envelope period 0\n" );
+			}
+			else if ( !volume )
+			{
+				osc_mode = noise_off | tone_off;
+			}
+		}
+		else if ( !volume )
+		{
+			osc_mode = noise_off | tone_off;
+		}
+		
+		// tone time
+		blip_time_t const period = osc->period;
+		blip_time_t time = start_time + osc->delay;
+		if ( osc_mode & tone_off ) // maintain tone's phase when off
+		{
+			blargg_long count = (final_end_time - time + period - 1) / period;
+			time += count * period;
+			osc->phase ^= count & 1;
+		}
+		
+		// noise time
+		blip_time_t ntime = final_end_time;
+		blargg_ulong noise_lfsr = 1;
+		if ( !(osc_mode & noise_off) )
+		{
+			ntime = start_time + old_noise_delay;
+			noise_lfsr = old_noise_lfsr;
+			//if ( (regs [6] & 0x1F) == 0 )
+			//  debug_printf( "Used noise period 0\n" );
+		}
+		
+		// The following efficiently handles several cases (least demanding first):
+		// * Tone, noise, and envelope disabled, where channel acts as 4-bit DAC
+		// * Just tone or just noise, envelope disabled
+		// * Envelope controlling tone and/or noise
+		// * Tone and noise disabled, envelope enabled with high frequency
+		// * Tone and noise together
+		// * Tone and noise together with envelope
+		
+		// This loop only runs one iteration if envelope is disabled. If envelope
+		// is being used as a waveform (tone and noise disabled), this loop will
+		// still be reasonably efficient since the bulk of it will be skipped.
+		while ( 1 )
+		{
+			// current amplitude
+			int amp = 0;
+			if ( (osc_mode | osc->phase) & 1 & (osc_mode >> 3 | noise_lfsr) )
+				amp = volume;
+			{
+				int delta = amp - osc->last_amp;
+				if ( delta )
+				{
+					osc->last_amp = amp;
+					synth_.offset( start_time, delta, osc_output );
+				}
+			}
+			
+			// Run wave and noise interleved with each catching up to the other.
+			// If one or both are disabled, their "current time" will be past end time,
+			// so there will be no significant performance hit.
+			if ( ntime < end_time || time < end_time )
+			{
+				// Since amplitude was updated above, delta will always be +/- volume,
+				// so we can avoid using last_amp every time to calculate the delta.
+				int delta = amp * 2 - volume;
+				int delta_non_zero = delta != 0;
+				int phase = osc->phase | (osc_mode & tone_off); assert( tone_off == 0x01 );
+				do
+				{
+					// run noise
+					blip_time_t end = end_time;
+					if ( end_time > time ) end = time;
+					if ( phase & delta_non_zero )
+					{
+						while ( ntime <= end ) // must advance *past* time to avoid hang
+						{
+							int changed = noise_lfsr + 1;
+							noise_lfsr = (-(noise_lfsr & 1) & 0x12000) ^ (noise_lfsr >> 1);
+							if ( changed & 2 )
+							{
+								delta = -delta;
+								synth_.offset( ntime, delta, osc_output );
+							}
+							ntime += noise_period;
+						}
+					}
+					else
+					{
+						// 20 or more noise periods on average for some music
+						blargg_long remain = end - ntime;
+						blargg_long count = remain / noise_period;
+						if ( remain >= 0 )
+							ntime += noise_period + count * noise_period;
+					}
+					
+					// run tone
+					end = end_time;
+					if ( end_time > ntime ) end = ntime;
+					if ( noise_lfsr & delta_non_zero )
+					{
+						while ( time < end )
+						{
+							delta = -delta;
+							synth_.offset( time, delta, osc_output );
+							time += period;
+							//phase ^= 1;
+						}
+						//assert( phase == (delta > 0) );
+						phase = unsigned (-delta) >> (CHAR_BIT * sizeof (unsigned) - 1);
+						// (delta > 0)
+					}
+					else
+					{
+						// loop usually runs less than once
+						//SUB_CASE_COUNTER( (time < end) * (end - time + period - 1) / period );
+						
+						while ( time < end )
+						{
+							time += period;
+							phase ^= 1;
+						}
+					}
+				}
+				while ( time < end_time || ntime < end_time );
+				
+				osc->last_amp = (delta + volume) >> 1;
+				if ( !(osc_mode & tone_off) )
+					osc->phase = phase;
+			}
+			
+			if ( end_time >= final_end_time )
+				break; // breaks first time when envelope is disabled
+			
+			// next envelope step
+			if ( ++osc_env_pos >= 0 )
+				osc_env_pos -= 32;
+			volume = env.wave [osc_env_pos] >> half_vol;
+			
+			start_time = end_time;
+			end_time += env_period;
+			if ( end_time > final_end_time )
+				end_time = final_end_time;
+		}
+		osc->delay = time - final_end_time;
+		
+		if ( !(osc_mode & noise_off) )
+		{
+			noise.delay = ntime - final_end_time;
+			noise.lfsr = noise_lfsr;
+		}
+	}
+	
+	// TODO: optimized saw wave envelope?
+	
+	// maintain envelope phase
+	blip_time_t remain = final_end_time - last_time - env.delay;
+	if ( remain >= 0 )
+	{
+		blargg_long count = (remain + env_period) / env_period;
+		env.pos += count;
+		if ( env.pos >= 0 )
+			env.pos = (env.pos & 31) - 32;
+		remain -= count * env_period;
+		assert( -remain <= env_period );
+	}
+	env.delay = -remain;
+	assert( env.delay > 0 );
+	assert( env.pos < 0 );
+	
+	last_time = final_end_time;
+}
diff --git a/libraries/game-music-emu/gme/Ay_Apu.h b/libraries/game-music-emu/gme/Ay_Apu.h
new file mode 100644
index 000000000..ad2d83692
--- /dev/null
+++ b/libraries/game-music-emu/gme/Ay_Apu.h
@@ -0,0 +1,106 @@
+// AY-3-8910 sound chip emulator
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef AY_APU_H
+#define AY_APU_H
+
+#include "blargg_common.h"
+#include "Blip_Buffer.h"
+
+class Ay_Apu {
+public:
+	// Set buffer to generate all sound into, or disable sound if NULL
+	void output( Blip_Buffer* );
+	
+	// Reset sound chip
+	void reset();
+	
+	// Write to register at specified time
+	enum { reg_count = 16 };
+	void write( blip_time_t time, int addr, int data );
+	
+	// Run sound to specified time, end current time frame, then start a new
+	// time frame at time 0. Time frames have no effect on emulation and each
+	// can be whatever length is convenient.
+	void end_frame( blip_time_t length );
+	
+// Additional features
+	
+	// Set sound output of specific oscillator to buffer, where index is
+	// 0, 1, or 2. If buffer is NULL, the specified oscillator is muted.
+	enum { osc_count = 3 };
+	void osc_output( int index, Blip_Buffer* );
+	
+	// Set overall volume (default is 1.0)
+	void volume( double );
+	
+	// Set treble equalization (see documentation)
+	void treble_eq( blip_eq_t const& );
+	
+public:
+	Ay_Apu();
+	typedef unsigned char byte;
+private:
+	struct osc_t
+	{
+		blip_time_t period;
+		blip_time_t delay;
+		short last_amp;
+		short phase;
+		Blip_Buffer* output;
+	} oscs [osc_count];
+	blip_time_t last_time;
+	byte regs [reg_count];
+	
+	struct {
+		blip_time_t delay;
+		blargg_ulong lfsr;
+	} noise;
+	
+	struct {
+		blip_time_t delay;
+		byte const* wave;
+		int pos;
+		byte modes [8] [48]; // values already passed through volume table
+	} env;
+	
+	void run_until( blip_time_t );
+	void write_data_( int addr, int data );
+public:
+	enum { amp_range = 255 };
+	Blip_Synth<blip_good_quality,1> synth_;
+};
+
+inline void Ay_Apu::volume( double v ) { synth_.volume( 0.7 / osc_count / amp_range * v ); }
+
+inline void Ay_Apu::treble_eq( blip_eq_t const& eq ) { synth_.treble_eq( eq ); }
+
+inline void Ay_Apu::write( blip_time_t time, int addr, int data )
+{
+	run_until( time );
+	write_data_( addr, data );
+}
+
+inline void Ay_Apu::osc_output( int i, Blip_Buffer* buf )
+{
+	assert( (unsigned) i < osc_count );
+	oscs [i].output = buf;
+}
+
+inline void Ay_Apu::output( Blip_Buffer* buf )
+{
+	osc_output( 0, buf );
+	osc_output( 1, buf );
+	osc_output( 2, buf );
+}
+
+inline void Ay_Apu::end_frame( blip_time_t time )
+{
+	if ( time > last_time )
+		run_until( time );
+	
+	assert( last_time >= time );
+	last_time -= time;
+}
+
+#endif
diff --git a/libraries/game-music-emu/gme/Ay_Cpu.cpp b/libraries/game-music-emu/gme/Ay_Cpu.cpp
new file mode 100644
index 000000000..31c912568
--- /dev/null
+++ b/libraries/game-music-emu/gme/Ay_Cpu.cpp
@@ -0,0 +1,1659 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+/*
+Last validated with zexall 2006.11.21 5:26 PM
+* Doesn't implement the R register or immediate interrupt after EI.
+* Address wrap-around isn't completely correct, but is prevented from crashing emulator.
+*/
+
+#include "Ay_Cpu.h"
+
+#include "blargg_endian.h"
+#include <string.h>
+
+//#include "z80_cpu_log.h"
+
+/* Copyright (C) 2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#define SYNC_TIME()     (void) (s.time = s_time)
+#define RELOAD_TIME()   (void) (s_time = s.time)
+
+// Callbacks to emulator
+
+#define CPU_OUT( cpu, addr, data, TIME )\
+	ay_cpu_out( cpu, TIME, addr, data )
+
+#define CPU_IN( cpu, addr, TIME )\
+	ay_cpu_in( cpu, addr )
+
+#include "blargg_source.h"
+
+// flags, named with hex value for clarity
+int const S80 = 0x80;
+int const Z40 = 0x40;
+int const F20 = 0x20;
+int const H10 = 0x10;
+int const F08 = 0x08;
+int const V04 = 0x04;
+int const P04 = 0x04;
+int const N02 = 0x02;
+int const C01 = 0x01;
+
+#define SZ28P( n )  szpc [n]
+#define SZ28PC( n ) szpc [n]
+#define SZ28C( n )  (szpc [n] & ~P04)
+#define SZ28( n )   SZ28C( n )
+
+#define SET_R( n )  (void) (r.r = n)
+#define GET_R()     (r.r)
+
+Ay_Cpu::Ay_Cpu()
+{
+	state = &state_;
+	for ( int i = 0x100; --i >= 0; )
+	{
+		int even = 1;
+		for ( int p = i; p; p >>= 1 )
+			even ^= p;
+		int n = (i & (S80 | F20 | F08)) | ((even & 1) * P04);
+		szpc [i] = n;
+		szpc [i + 0x100] = n | C01;
+	}
+	szpc [0x000] |= Z40;
+	szpc [0x100] |= Z40;
+}
+
+void Ay_Cpu::reset( void* m )
+{
+	mem = (uint8_t*) m;
+	
+	check( state == &state_ );
+	state = &state_;
+	state_.time = 0;
+	state_.base = 0;
+	end_time_   = 0;
+	
+	memset( &r, 0, sizeof r );
+}
+
+#define TIME                        (s_time + s.base)
+#define READ_PROG( addr )           (mem [addr])
+#define INSTR( offset )             READ_PROG( pc + (offset) )
+#define GET_ADDR()                  GET_LE16( &READ_PROG( pc ) )
+#define READ( addr )                READ_PROG( addr )
+#define WRITE( addr, data )         (void) (READ_PROG( addr ) = data)
+#define READ_WORD( addr )           GET_LE16( &READ_PROG( addr ) )
+#define WRITE_WORD( addr, data )    SET_LE16( &READ_PROG( addr ), data )
+#define IN( addr )                  CPU_IN( this, addr, TIME )
+#define OUT( addr, data )           CPU_OUT( this, addr, data, TIME )
+
+#if BLARGG_BIG_ENDIAN
+	#define R8( n, offset ) ((r8_ - offset) [n]) 
+#elif BLARGG_LITTLE_ENDIAN
+	#define R8( n, offset ) ((r8_ - offset) [(n) ^ 1]) 
+#else
+	#error "Byte order of CPU must be known"
+#endif
+
+//#define R16( n, shift, offset )   (r16_ [((n) >> shift) - (offset >> shift)])
+
+// help compiler see that it can just adjust stack offset, saving an extra instruction
+#define R16( n, shift, offset )\
+	(*(uint16_t*) ((char*) r16_ - (offset >> (shift - 1)) + ((n) >> (shift - 1))))
+
+#define CASE5( a, b, c, d, e          ) case 0x##a:case 0x##b:case 0x##c:case 0x##d:case 0x##e
+#define CASE6( a, b, c, d, e, f       ) CASE5( a, b, c, d, e       ): case 0x##f
+#define CASE7( a, b, c, d, e, f, g    ) CASE6( a, b, c, d, e, f    ): case 0x##g
+#define CASE8( a, b, c, d, e, f, g, h ) CASE7( a, b, c, d, e, f, g ): case 0x##h
+
+// high four bits are $ED time - 8, low four bits are $DD/$FD time - 8
+static byte const ed_dd_timing [0x100] = {
+//0    1    2    3    4    5    6    7    8    9    A    B    C    D    E    F
+0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x00,0x00,0x00,
+0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x00,0x00,0x00,
+0x00,0x06,0x0C,0x02,0x00,0x00,0x03,0x00,0x00,0x07,0x0C,0x02,0x00,0x00,0x03,0x00,
+0x00,0x00,0x00,0x00,0x0F,0x0F,0x0B,0x00,0x00,0x07,0x00,0x00,0x00,0x00,0x00,0x00,
+0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0x10,0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0x10,
+0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0x10,0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0x10,
+0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0xA0,0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0xA0,
+0x4B,0x4B,0x7B,0xCB,0x0B,0x6B,0x00,0x0B,0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0x00,
+0x00,0x00,0x00,0x00,0x00,0x00,0x0B,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0B,0x00,
+0x00,0x00,0x00,0x00,0x00,0x00,0x0B,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0B,0x00,
+0x80,0x80,0x80,0x80,0x00,0x00,0x0B,0x00,0x80,0x80,0x80,0x80,0x00,0x00,0x0B,0x00,
+0xD0,0xD0,0xD0,0xD0,0x00,0x00,0x0B,0x00,0xD0,0xD0,0xD0,0xD0,0x00,0x00,0x0B,0x00,
+0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0x00,0x00,0x00,0x00,
+0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+0x00,0x06,0x00,0x0F,0x00,0x07,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x00,0x00,0x00,
+};
+
+bool Ay_Cpu::run( cpu_time_t end_time )
+{
+	set_end_time( end_time );
+	state_t s = this->state_;
+	this->state = &s;
+	bool warning = false;
+	
+	union {
+		regs_t rg;
+		pairs_t rp;
+		uint8_t r8_ [8]; // indexed
+		uint16_t r16_ [4];
+	};
+	rg = this->r.b;
+	
+	cpu_time_t s_time = s.time;
+	uint8_t* const mem = this->mem; // cache
+	uint16_t pc = r.pc;
+	uint16_t sp = r.sp;
+	uint16_t ix = r.ix; // TODO: keep in memory for direct access?
+	uint16_t iy = r.iy;
+	int flags = r.b.flags;
+	
+	goto loop;
+jr_not_taken:
+	s_time -= 5;
+	goto loop;
+call_not_taken:
+	s_time -= 7; 
+jp_not_taken:
+	pc += 2;
+loop:
+	
+	check( (unsigned long) pc < 0x10000 );
+	check( (unsigned long) sp < 0x10000 );
+	check( (unsigned) flags < 0x100 );
+	check( (unsigned) ix < 0x10000 );
+	check( (unsigned) iy < 0x10000 );
+	
+	uint8_t opcode;
+	opcode = READ_PROG( pc );
+	pc++;
+	
+	static byte const base_timing [0x100] = {
+	//   0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+		 4,10, 7, 6, 4, 4, 7, 4, 4,11, 7, 6, 4, 4, 7, 4, // 0
+		13,10, 7, 6, 4, 4, 7, 4,12,11, 7, 6, 4, 4, 7, 4, // 1
+		12,10,16, 6, 4, 4, 7, 4,12,11,16, 6, 4, 4, 7, 4, // 2
+		12,10,13, 6,11,11,10, 4,12,11,13, 6, 4, 4, 7, 4, // 3
+		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // 4
+		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // 5
+		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // 6
+		 7, 7, 7, 7, 7, 7, 4, 7, 4, 4, 4, 4, 4, 4, 7, 4, // 7
+		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // 8
+		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // 9
+		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // A
+		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // B
+		11,10,10,10,17,11, 7,11,11,10,10, 8,17,17, 7,11, // C
+		11,10,10,11,17,11, 7,11,11, 4,10,11,17, 8, 7,11, // D
+		11,10,10,19,17,11, 7,11,11, 4,10, 4,17, 8, 7,11, // E
+		11,10,10, 4,17,11, 7,11,11, 6,10, 4,17, 8, 7,11, // F
+	};
+	
+	uint16_t data;
+	data = base_timing [opcode];
+	if ( (s_time += data) >= 0 )
+		goto possibly_out_of_time;
+almost_out_of_time:
+	
+	data = READ_PROG( pc );
+	
+	#ifdef Z80_CPU_LOG_H
+		//log_opcode( opcode, READ_PROG( pc ) );
+		z80_log_regs( rg.a, rp.bc, rp.de, rp.hl, sp, ix, iy );
+		z80_cpu_log( "new", pc - 1, opcode, READ_PROG( pc ),
+				READ_PROG( pc + 1 ), READ_PROG( pc + 2 ) );
+	#endif
+	
+	switch ( opcode )
+	{
+possibly_out_of_time:
+		if ( s_time < (int) data )
+			goto almost_out_of_time;
+		s_time -= data;
+		goto out_of_time;
+
+// Common
+
+	case 0x00: // NOP
+	CASE7( 40, 49, 52, 5B, 64, 6D, 7F ): // LD B,B etc.
+		goto loop;
+	
+	case 0x08:{// EX AF,AF'
+		int temp = r.alt.b.a;
+		r.alt.b.a = rg.a;
+		rg.a = temp;
+		
+		temp = r.alt.b.flags;
+		r.alt.b.flags = flags;
+		flags = temp;
+		goto loop;
+	}
+	
+	case 0xD3: // OUT (imm),A
+		pc++;
+		OUT( data + rg.a * 0x100, rg.a );
+		goto loop;
+		
+	case 0x2E: // LD L,imm
+		pc++;
+		rg.l = data;
+		goto loop;
+	
+	case 0x3E: // LD A,imm
+		pc++;
+		rg.a = data;
+		goto loop;
+	
+	case 0x3A:{// LD A,(addr)
+		uint16_t addr = GET_ADDR();
+		pc += 2;
+		rg.a = READ( addr );
+		goto loop;
+	}
+	
+// Conditional
+
+#define ZERO    (flags & Z40)
+#define CARRY   (flags & C01)
+#define EVEN    (flags & P04)
+#define MINUS   (flags & S80)
+
+// JR
+#define JR( cond ) {\
+	int disp = (int8_t) data;\
+	pc++;\
+	if ( !(cond) )\
+		goto jr_not_taken;\
+	pc += disp;\
+	goto loop;\
+}
+	
+	case 0x20: JR( !ZERO  ) // JR NZ,disp
+	case 0x28: JR(  ZERO  ) // JR Z,disp
+	case 0x30: JR( !CARRY ) // JR NC,disp
+	case 0x38: JR(  CARRY ) // JR C,disp
+	case 0x18: JR(  true  ) // JR disp
+
+	case 0x10:{// DJNZ disp
+		int temp = rg.b - 1;
+		rg.b = temp;
+		JR( temp )
+	}
+	
+// JP
+#define JP( cond )  if ( !(cond) ) goto jp_not_taken; pc = GET_ADDR(); goto loop;
+	
+	case 0xC2: JP( !ZERO  ) // JP NZ,addr
+	case 0xCA: JP(  ZERO  ) // JP Z,addr
+	case 0xD2: JP( !CARRY ) // JP NC,addr
+	case 0xDA: JP(  CARRY ) // JP C,addr
+	case 0xE2: JP( !EVEN  ) // JP PO,addr
+	case 0xEA: JP(  EVEN  ) // JP PE,addr
+	case 0xF2: JP( !MINUS ) // JP P,addr
+	case 0xFA: JP(  MINUS ) // JP M,addr
+	
+	case 0xC3: // JP addr
+		pc = GET_ADDR();
+		goto loop;
+	
+	case 0xE9: // JP HL
+		pc = rp.hl;
+		goto loop;
+
+// RET
+#define RET( cond ) if ( cond ) goto ret_taken; s_time -= 6; goto loop;
+	
+	case 0xC0: RET( !ZERO  ) // RET NZ
+	case 0xC8: RET(  ZERO  ) // RET Z
+	case 0xD0: RET( !CARRY ) // RET NC
+	case 0xD8: RET(  CARRY ) // RET C
+	case 0xE0: RET( !EVEN  ) // RET PO
+	case 0xE8: RET(  EVEN  ) // RET PE
+	case 0xF0: RET( !MINUS ) // RET P
+	case 0xF8: RET(  MINUS ) // RET M
+	
+	case 0xC9: // RET
+	ret_taken:
+		pc = READ_WORD( sp );
+		sp = uint16_t (sp + 2);
+		goto loop;
+	
+// CALL
+#define CALL( cond ) if ( cond ) goto call_taken; goto call_not_taken;
+
+	case 0xC4: CALL( !ZERO  ) // CALL NZ,addr
+	case 0xCC: CALL(  ZERO  ) // CALL Z,addr
+	case 0xD4: CALL( !CARRY ) // CALL NC,addr
+	case 0xDC: CALL(  CARRY ) // CALL C,addr
+	case 0xE4: CALL( !EVEN  ) // CALL PO,addr
+	case 0xEC: CALL(  EVEN  ) // CALL PE,addr
+	case 0xF4: CALL( !MINUS ) // CALL P,addr
+	case 0xFC: CALL(  MINUS ) // CALL M,addr
+	
+	case 0xCD:{// CALL addr
+	call_taken:
+		uint16_t addr = pc + 2;
+		pc = GET_ADDR();
+		sp = uint16_t (sp - 2);
+		WRITE_WORD( sp, addr );
+		goto loop;
+	}
+	
+	case 0xFF: // RST
+		if ( (pc - 1) > 0xFFFF )
+		{
+			pc = uint16_t (pc - 1);
+			s_time -= 11;
+			goto loop;
+		}
+	CASE7( C7, CF, D7, DF, E7, EF, F7 ):
+		data = pc;
+		pc = opcode & 0x38;
+		goto push_data;
+
+// PUSH/POP
+	case 0xF5: // PUSH AF
+		data = rg.a * 0x100u + flags;
+		goto push_data;
+	
+	case 0xC5: // PUSH BC
+	case 0xD5: // PUSH DE
+	case 0xE5: // PUSH HL
+		data = R16( opcode, 4, 0xC5 );
+	push_data:
+		sp = uint16_t (sp - 2);
+		WRITE_WORD( sp, data );
+		goto loop;
+	
+	case 0xF1: // POP AF
+		flags = READ( sp );
+		rg.a = READ( sp + 1 );
+		sp = uint16_t (sp + 2);
+		goto loop;
+	
+	case 0xC1: // POP BC
+	case 0xD1: // POP DE
+	case 0xE1: // POP HL
+		R16( opcode, 4, 0xC1 ) = READ_WORD( sp );
+		sp = uint16_t (sp + 2);
+		goto loop;
+	
+// ADC/ADD/SBC/SUB
+	case 0x96: // SUB (HL)
+	case 0x86: // ADD (HL)
+		flags &= ~C01;
+	case 0x9E: // SBC (HL)
+	case 0x8E: // ADC (HL)
+		data = READ( rp.hl );
+		goto adc_data;
+	
+	case 0xD6: // SUB A,imm
+	case 0xC6: // ADD imm
+		flags &= ~C01;
+	case 0xDE: // SBC A,imm
+	case 0xCE: // ADC imm
+		pc++;
+		goto adc_data;
+	
+	CASE7( 90, 91, 92, 93, 94, 95, 97 ): // SUB r
+	CASE7( 80, 81, 82, 83, 84, 85, 87 ): // ADD r
+		flags &= ~C01;
+	CASE7( 98, 99, 9A, 9B, 9C, 9D, 9F ): // SBC r
+	CASE7( 88, 89, 8A, 8B, 8C, 8D, 8F ): // ADC r
+		data = R8( opcode & 7, 0 );
+	adc_data: {
+		int result = data + (flags & C01);
+		data ^= rg.a;
+		flags = opcode >> 3 & N02; // bit 4 is set in subtract opcodes
+		if ( flags )
+			result = -result;
+		result += rg.a;
+		data ^= result;
+		flags |=(data & H10) |
+				((data - -0x80) >> 6 & V04) |
+				SZ28C( result & 0x1FF );
+		rg.a = result;
+		goto loop;
+	}
+
+// CP
+	case 0xBE: // CP (HL)
+		data = READ( rp.hl );
+		goto cp_data;
+	
+	case 0xFE: // CP imm
+		pc++;
+		goto cp_data;
+	
+	CASE7( B8, B9, BA, BB, BC, BD, BF ): // CP r
+		data = R8( opcode, 0xB8 );
+	cp_data: {
+		int result = rg.a - data;
+		flags = N02 | (data & (F20 | F08)) | (result >> 8 & C01);
+		data ^= rg.a;
+		flags |=(((result ^ rg.a) & data) >> 5 & V04) |
+				(((data & H10) ^ result) & (S80 | H10));
+		if ( (uint8_t) result )
+			goto loop;
+		flags |= Z40;
+		goto loop;
+	}
+	
+// ADD HL,rp
+	
+	case 0x39: // ADD HL,SP
+		data = sp;
+		goto add_hl_data;
+	
+	case 0x09: // ADD HL,BC
+	case 0x19: // ADD HL,DE
+	case 0x29: // ADD HL,HL
+		data = R16( opcode, 4, 0x09 );
+	add_hl_data: {
+		blargg_ulong sum = rp.hl + data;
+		data ^= rp.hl;
+		rp.hl = sum;
+		flags = (flags & (S80 | Z40 | V04)) |
+				(sum >> 16) |
+				(sum >> 8 & (F20 | F08)) |
+				((data ^ sum) >> 8 & H10);
+		goto loop;
+	}
+	
+	case 0x27:{// DAA
+		int a = rg.a;
+		if ( a > 0x99 )
+			flags |= C01;
+		
+		int adjust = 0x60 & -(flags & C01);
+		
+		if ( flags & H10 || (a & 0x0F) > 9 )
+			adjust |= 0x06;
+		
+		if ( flags & N02 )
+			adjust = -adjust;
+		a += adjust;
+		
+		flags = (flags & (C01 | N02)) |
+				((rg.a ^ a) & H10) |
+				SZ28P( (uint8_t) a );
+		rg.a = a;
+		goto loop;
+	}
+	/*
+	case 0x27:{// DAA
+		// more optimized, but probably not worth the obscurity
+		int f = (rg.a + (0xFF - 0x99)) >> 8 | flags; // (a > 0x99 ? C01 : 0) | flags
+		int adjust = 0x60 & -(f & C01); // f & C01 ? 0x60 : 0
+		
+		if ( (((rg.a + (0x0F - 9)) ^ rg.a) | f) & H10 ) // flags & H10 || (rg.a & 0x0F) > 9
+			adjust |= 0x06;
+		
+		if ( f & N02 )
+			adjust = -adjust;
+		int a = rg.a + adjust;
+		
+		flags = (f & (N02 | C01)) | ((rg.a ^ a) & H10) | SZ28P( (uint8_t) a );
+		rg.a = a;
+		goto loop;
+	}
+	*/
+	
+// INC/DEC
+	case 0x34: // INC (HL)
+		data = READ( rp.hl ) + 1;
+		WRITE( rp.hl, data );
+		goto inc_set_flags;
+	
+	CASE7( 04, 0C, 14, 1C, 24, 2C, 3C ): // INC r
+		data = ++R8( opcode >> 3, 0 );
+	inc_set_flags:
+		flags = (flags & C01) |
+				(((data & 0x0F) - 1) & H10) |
+				SZ28( (uint8_t) data );
+		if ( data != 0x80 )
+			goto loop;
+		flags |= V04;
+		goto loop;
+	
+	case 0x35: // DEC (HL)
+		data = READ( rp.hl ) - 1;
+		WRITE( rp.hl, data );
+		goto dec_set_flags;
+	
+	CASE7( 05, 0D, 15, 1D, 25, 2D, 3D ): // DEC r
+		data = --R8( opcode >> 3, 0 );
+	dec_set_flags:
+		flags = (flags & C01) | N02 |
+				(((data & 0x0F) + 1) & H10) |
+				SZ28( (uint8_t) data );
+		if ( data != 0x7F )
+			goto loop;
+		flags |= V04;
+		goto loop;
+
+	case 0x03: // INC BC
+	case 0x13: // INC DE
+	case 0x23: // INC HL
+		R16( opcode, 4, 0x03 )++;
+		goto loop;
+	
+	case 0x33: // INC SP
+		sp = uint16_t (sp + 1);
+		goto loop;
+	
+	case 0x0B: // DEC BC
+	case 0x1B: // DEC DE
+	case 0x2B: // DEC HL
+		R16( opcode, 4, 0x0B )--;
+		goto loop;
+	
+	case 0x3B: // DEC SP
+		sp = uint16_t (sp - 1);
+		goto loop;
+	
+// AND
+	case 0xA6: // AND (HL)
+		data = READ( rp.hl );
+		goto and_data;
+	
+	case 0xE6: // AND imm
+		pc++;
+		goto and_data;
+	
+	CASE7( A0, A1, A2, A3, A4, A5, A7 ): // AND r
+		data = R8( opcode, 0xA0 );
+	and_data:
+		rg.a &= data;
+		flags = SZ28P( rg.a ) | H10;
+		goto loop;
+	
+// OR
+	case 0xB6: // OR (HL)
+		data = READ( rp.hl );
+		goto or_data;
+	
+	case 0xF6: // OR imm
+		pc++;
+		goto or_data;
+	
+	CASE7( B0, B1, B2, B3, B4, B5, B7 ): // OR r
+		data = R8( opcode, 0xB0 );
+	or_data:
+		rg.a |= data;
+		flags = SZ28P( rg.a );
+		goto loop;
+
+// XOR
+	case 0xAE: // XOR (HL)
+		data = READ( rp.hl );
+		goto xor_data;
+	
+	case 0xEE: // XOR imm
+		pc++;
+		goto xor_data;
+	
+	CASE7( A8, A9, AA, AB, AC, AD, AF ): // XOR r
+		data = R8( opcode, 0xA8 );
+	xor_data:
+		rg.a ^= data;
+		flags = SZ28P( rg.a );
+		goto loop;
+
+// LD
+	CASE7( 70, 71, 72, 73, 74, 75, 77 ): // LD (HL),r
+		WRITE( rp.hl, R8( opcode, 0x70 ) );
+		goto loop;
+	
+	CASE6( 41, 42, 43, 44, 45, 47 ): // LD B,r
+	CASE6( 48, 4A, 4B, 4C, 4D, 4F ): // LD C,r
+	CASE6( 50, 51, 53, 54, 55, 57 ): // LD D,r
+	CASE6( 58, 59, 5A, 5C, 5D, 5F ): // LD E,r
+	CASE6( 60, 61, 62, 63, 65, 67 ): // LD H,r
+	CASE6( 68, 69, 6A, 6B, 6C, 6F ): // LD L,r
+	CASE6( 78, 79, 7A, 7B, 7C, 7D ): // LD A,r
+		R8( opcode >> 3 & 7, 0 ) = R8( opcode & 7, 0 );
+		goto loop;
+	
+	CASE5( 06, 0E, 16, 1E, 26 ): // LD r,imm
+		R8( opcode >> 3, 0 ) = data;
+		pc++;
+		goto loop;
+	
+	case 0x36: // LD (HL),imm
+		pc++;
+		WRITE( rp.hl, data );
+		goto loop;
+	
+	CASE7( 46, 4E, 56, 5E, 66, 6E, 7E ): // LD r,(HL)
+		R8( opcode >> 3, 8 ) = READ( rp.hl );
+		goto loop;
+	
+	case 0x01: // LD rp,imm
+	case 0x11:
+	case 0x21:
+		R16( opcode, 4, 0x01 ) = GET_ADDR();
+		pc += 2;
+		goto loop;
+	
+	case 0x31: // LD sp,imm
+		sp = GET_ADDR();
+		pc += 2;
+		goto loop;
+	
+	case 0x2A:{// LD HL,(addr)
+		uint16_t addr = GET_ADDR();
+		pc += 2;
+		rp.hl = READ_WORD( addr );
+		goto loop;
+	}
+	
+	case 0x32:{// LD (addr),A
+		uint16_t addr = GET_ADDR();
+		pc += 2;
+		WRITE( addr, rg.a );
+		goto loop;
+	}
+	
+	case 0x22:{// LD (addr),HL
+		uint16_t addr = GET_ADDR();
+		pc += 2;
+		WRITE_WORD( addr, rp.hl );
+		goto loop;
+	}
+	
+	case 0x02: // LD (BC),A
+	case 0x12: // LD (DE),A
+		WRITE( R16( opcode, 4, 0x02 ), rg.a );
+		goto loop;
+	
+	case 0x0A: // LD A,(BC)
+	case 0x1A: // LD A,(DE)
+		rg.a = READ( R16( opcode, 4, 0x0A ) );
+		goto loop;
+	
+	case 0xF9: // LD SP,HL
+		sp = rp.hl;
+		goto loop;
+	
+// Rotate
+	
+	case 0x07:{// RLCA
+		uint16_t temp = rg.a;
+		temp = (temp << 1) | (temp >> 7);
+		flags = (flags & (S80 | Z40 | P04)) |
+				(temp & (F20 | F08 | C01));
+		rg.a = temp;
+		goto loop;
+	}
+	
+	case 0x0F:{// RRCA
+		uint16_t temp = rg.a;
+		flags = (flags & (S80 | Z40 | P04)) |
+				(temp & C01);
+		temp = (temp << 7) | (temp >> 1);
+		flags |= temp & (F20 | F08);
+		rg.a = temp;
+		goto loop;
+	}
+	
+	case 0x17:{// RLA
+		blargg_ulong temp = (rg.a << 1) | (flags & C01);
+		flags = (flags & (S80 | Z40 | P04)) |
+				(temp & (F20 | F08)) |
+				(temp >> 8);
+		rg.a = temp;
+		goto loop;
+	}
+	
+	case 0x1F:{// RRA
+		uint16_t temp = (flags << 7) | (rg.a >> 1);
+		flags = (flags & (S80 | Z40 | P04)) |
+				(temp & (F20 | F08)) |
+				(rg.a & C01);
+		rg.a = temp;
+		goto loop;
+	}
+	
+// Misc
+	case 0x2F:{// CPL
+		uint16_t temp = ~rg.a;
+		flags = (flags & (S80 | Z40 | P04 | C01)) |
+				(temp & (F20 | F08)) |
+				(H10 | N02);
+		rg.a = temp;
+		goto loop;
+	}
+	
+	case 0x3F:{// CCF
+		flags = ((flags & (S80 | Z40 | P04 | C01)) ^ C01) |
+				(flags << 4 & H10) |
+				(rg.a & (F20 | F08));
+		goto loop;
+	}
+	
+	case 0x37: // SCF
+		flags = (flags & (S80 | Z40 | P04)) | C01 |
+				(rg.a & (F20 | F08));
+		goto loop;
+	
+	case 0xDB: // IN A,(imm)
+		pc++;
+		rg.a = IN( data + rg.a * 0x100 );
+		goto loop;
+
+	case 0xE3:{// EX (SP),HL
+		uint16_t temp = READ_WORD( sp );
+		WRITE_WORD( sp, rp.hl );
+		rp.hl = temp;
+		goto loop;
+	}
+	
+	case 0xEB:{// EX DE,HL
+		uint16_t temp = rp.hl;
+		rp.hl = rp.de;
+		rp.de = temp;
+		goto loop;
+	}
+	
+	case 0xD9:{// EXX DE,HL
+		uint16_t temp = r.alt.w.bc;
+		r.alt.w.bc = rp.bc;
+		rp.bc = temp;
+		
+		temp = r.alt.w.de;
+		r.alt.w.de = rp.de;
+		rp.de = temp;
+		
+		temp = r.alt.w.hl;
+		r.alt.w.hl = rp.hl;
+		rp.hl = temp;
+		goto loop;
+	}
+	
+	case 0xF3: // DI
+		r.iff1 = 0;
+		r.iff2 = 0;
+		goto loop;
+	
+	case 0xFB: // EI
+		r.iff1 = 1;
+		r.iff2 = 1;
+		// TODO: delayed effect
+		goto loop;
+	
+	case 0x76: // HALT
+		goto halt;
+	
+//////////////////////////////////////// CB prefix
+	{
+	case 0xCB:
+		unsigned data2;
+		data2 = INSTR( 1 );
+		(void) data2; // TODO is this the same as data in all cases?
+		pc++;
+		switch ( data )
+		{
+	
+	// Rotate left
+		
+	#define RLC( read, write ) {\
+		uint8_t result = read;\
+		result = uint8_t (result << 1) | (result >> 7);\
+		flags = SZ28P( result ) | (result & C01);\
+		write;\
+		goto loop;\
+	}
+		
+		case 0x06: // RLC (HL)
+			s_time += 7;
+			data = rp.hl;
+		rlc_data_addr:
+			RLC( READ( data ), WRITE( data, result ) )
+		
+		CASE7( 00, 01, 02, 03, 04, 05, 07 ):{// RLC r
+			uint8_t& reg = R8( data, 0 );
+			RLC( reg, reg = result )
+		}
+		
+	#define RL( read, write ) {\
+		uint16_t result = (read << 1) | (flags & C01);\
+		flags = SZ28PC( result );\
+		write;\
+		goto loop;\
+	}
+		
+		case 0x16: // RL (HL)
+			s_time += 7;
+			data = rp.hl;
+		rl_data_addr:
+			RL( READ( data ), WRITE( data, result ) )
+		
+		CASE7( 10, 11, 12, 13, 14, 15, 17 ):{// RL r
+			uint8_t& reg = R8( data, 0x10 );
+			RL( reg, reg = result )
+		}
+		
+	#define SLA( read, add, write ) {\
+		uint16_t result = (read << 1) | add;\
+		flags = SZ28PC( result );\
+		write;\
+		goto loop;\
+	}
+		
+		case 0x26: // SLA (HL)
+			s_time += 7;
+			data = rp.hl;
+		sla_data_addr:
+			SLA( READ( data ), 0, WRITE( data, result ) )
+		
+		CASE7( 20, 21, 22, 23, 24, 25, 27 ):{// SLA r
+			uint8_t& reg = R8( data, 0x20 );
+			SLA( reg, 0, reg = result )
+		}
+		
+		case 0x36: // SLL (HL)
+			s_time += 7;
+			data = rp.hl;
+		sll_data_addr:
+			SLA( READ( data ), 1, WRITE( data, result ) )
+		
+		CASE7( 30, 31, 32, 33, 34, 35, 37 ):{// SLL r
+			uint8_t& reg = R8( data, 0x30 );
+			SLA( reg, 1, reg = result )
+		}
+		
+	// Rotate right
+		
+	#define RRC( read, write ) {\
+		uint8_t result = read;\
+		flags = result & C01;\
+		result = uint8_t (result << 7) | (result >> 1);\
+		flags |= SZ28P( result );\
+		write;\
+		goto loop;\
+	}
+		
+		case 0x0E: // RRC (HL)
+			s_time += 7;
+			data = rp.hl;
+		rrc_data_addr:
+			RRC( READ( data ), WRITE( data, result ) )
+		
+		CASE7( 08, 09, 0A, 0B, 0C, 0D, 0F ):{// RRC r
+			uint8_t& reg = R8( data, 0x08 );
+			RRC( reg, reg = result )
+		}
+		
+	#define RR( read, write ) {\
+		uint8_t result = read;\
+		uint8_t temp = result & C01;\
+		result = uint8_t (flags << 7) | (result >> 1);\
+		flags = SZ28P( result ) | temp;\
+		write;\
+		goto loop;\
+	}
+		
+		case 0x1E: // RR (HL)
+			s_time += 7;
+			data = rp.hl;
+		rr_data_addr:
+			RR( READ( data ), WRITE( data, result ) )
+		
+		CASE7( 18, 19, 1A, 1B, 1C, 1D, 1F ):{// RR r
+			uint8_t& reg = R8( data, 0x18 );
+			RR( reg, reg = result )
+		}
+		
+	#define SRA( read, write ) {\
+		uint8_t result = read;\
+		flags = result & C01;\
+		result = (result & 0x80) | (result >> 1);\
+		flags |= SZ28P( result );\
+		write;\
+		goto loop;\
+	}
+		
+		case 0x2E: // SRA (HL)
+			data = rp.hl;
+			s_time += 7;
+		sra_data_addr:
+			SRA( READ( data ), WRITE( data, result ) )
+		
+		CASE7( 28, 29, 2A, 2B, 2C, 2D, 2F ):{// SRA r
+			uint8_t& reg = R8( data, 0x28 );
+			SRA( reg, reg = result )
+		}
+		
+	#define SRL( read, write ) {\
+		uint8_t result = read;\
+		flags = result & C01;\
+		result >>= 1;\
+		flags |= SZ28P( result );\
+		write;\
+		goto loop;\
+	}
+		
+		case 0x3E: // SRL (HL)
+			s_time += 7;
+			data = rp.hl;
+		srl_data_addr:
+			SRL( READ( data ), WRITE( data, result ) )
+		
+		CASE7( 38, 39, 3A, 3B, 3C, 3D, 3F ):{// SRL r
+			uint8_t& reg = R8( data, 0x38 );
+			SRL( reg, reg = result )
+		}
+		
+	// BIT
+		{
+			unsigned temp;
+		CASE8( 46, 4E, 56, 5E, 66, 6E, 76, 7E ): // BIT b,(HL)
+			s_time += 4;
+			temp = READ( rp.hl );
+			flags &= C01;
+			goto bit_temp;
+		CASE7( 40, 41, 42, 43, 44, 45, 47 ): // BIT 0,r
+		CASE7( 48, 49, 4A, 4B, 4C, 4D, 4F ): // BIT 1,r
+		CASE7( 50, 51, 52, 53, 54, 55, 57 ): // BIT 2,r
+		CASE7( 58, 59, 5A, 5B, 5C, 5D, 5F ): // BIT 3,r
+		CASE7( 60, 61, 62, 63, 64, 65, 67 ): // BIT 4,r
+		CASE7( 68, 69, 6A, 6B, 6C, 6D, 6F ): // BIT 5,r
+		CASE7( 70, 71, 72, 73, 74, 75, 77 ): // BIT 6,r
+		CASE7( 78, 79, 7A, 7B, 7C, 7D, 7F ): // BIT 7,r
+			temp = R8( data & 7, 0 );
+			flags = (flags & C01) | (temp & (F20 | F08));
+		bit_temp:
+			int masked = temp & 1 << (data >> 3 & 7);
+			flags |=(masked & S80) | H10 |
+					((masked - 1) >> 8 & (Z40 | P04));
+			goto loop;
+		}
+		
+	// SET/RES
+		CASE8( 86, 8E, 96, 9E, A6, AE, B6, BE ): // RES b,(HL)
+		CASE8( C6, CE, D6, DE, E6, EE, F6, FE ):{// SET b,(HL)
+			s_time += 7;
+			int temp = READ( rp.hl );
+			int bit = 1 << (data >> 3 & 7);
+			temp |= bit; // SET
+			if ( !(data & 0x40) )
+				temp ^= bit; // RES
+			WRITE( rp.hl, temp );
+			goto loop;
+		}
+		
+		CASE7( C0, C1, C2, C3, C4, C5, C7 ): // SET 0,r
+		CASE7( C8, C9, CA, CB, CC, CD, CF ): // SET 1,r
+		CASE7( D0, D1, D2, D3, D4, D5, D7 ): // SET 2,r
+		CASE7( D8, D9, DA, DB, DC, DD, DF ): // SET 3,r
+		CASE7( E0, E1, E2, E3, E4, E5, E7 ): // SET 4,r
+		CASE7( E8, E9, EA, EB, EC, ED, EF ): // SET 5,r
+		CASE7( F0, F1, F2, F3, F4, F5, F7 ): // SET 6,r
+		CASE7( F8, F9, FA, FB, FC, FD, FF ): // SET 7,r
+			R8( data & 7, 0 ) |= 1 << (data >> 3 & 7);
+			goto loop;
+		
+		CASE7( 80, 81, 82, 83, 84, 85, 87 ): // RES 0,r
+		CASE7( 88, 89, 8A, 8B, 8C, 8D, 8F ): // RES 1,r
+		CASE7( 90, 91, 92, 93, 94, 95, 97 ): // RES 2,r
+		CASE7( 98, 99, 9A, 9B, 9C, 9D, 9F ): // RES 3,r
+		CASE7( A0, A1, A2, A3, A4, A5, A7 ): // RES 4,r
+		CASE7( A8, A9, AA, AB, AC, AD, AF ): // RES 5,r
+		CASE7( B0, B1, B2, B3, B4, B5, B7 ): // RES 6,r
+		CASE7( B8, B9, BA, BB, BC, BD, BF ): // RES 7,r
+			R8( data & 7, 0 ) &= ~(1 << (data >> 3 & 7));
+			goto loop;
+		}
+		assert( false );
+	}
+
+//////////////////////////////////////// ED prefix
+	{
+	case 0xED:
+		pc++;
+		s_time += ed_dd_timing [data] >> 4;
+		switch ( data )
+		{
+		{
+			blargg_ulong temp;
+		case 0x72: // SBC HL,SP
+		case 0x7A: // ADC HL,SP
+			temp = sp;
+			if ( 0 )
+		case 0x42: // SBC HL,BC
+		case 0x52: // SBC HL,DE
+		case 0x62: // SBC HL,HL
+		case 0x4A: // ADC HL,BC
+		case 0x5A: // ADC HL,DE
+		case 0x6A: // ADC HL,HL
+				temp = R16( data >> 3 & 6, 1, 0 );
+			blargg_ulong sum = temp + (flags & C01);
+			flags = ~data >> 2 & N02;
+			if ( flags )
+				sum = -sum;
+			sum += rp.hl;
+			temp ^= rp.hl;
+			temp ^= sum;
+			flags |=(sum >> 16 & C01) |
+					(temp >> 8 & H10) |
+					(sum >> 8 & (S80 | F20 | F08)) |
+					((temp - -0x8000) >> 14 & V04);
+			rp.hl = sum;
+			if ( (uint16_t) sum )
+				goto loop;
+			flags |= Z40;
+			goto loop;
+		}
+		
+		CASE8( 40, 48, 50, 58, 60, 68, 70, 78 ):{// IN r,(C)
+			int temp = IN( rp.bc );
+			R8( data >> 3, 8 ) = temp;
+			flags = (flags & C01) | SZ28P( temp );
+			goto loop;
+		}
+		
+		case 0x71: // OUT (C),0
+			rg.flags = 0;
+		CASE7( 41, 49, 51, 59, 61, 69, 79 ): // OUT (C),r
+			OUT( rp.bc, R8( data >> 3, 8 ) );
+			goto loop;
+		
+		{
+			unsigned temp;
+		case 0x73: // LD (ADDR),SP
+			temp = sp;
+			if ( 0 )
+		case 0x43: // LD (ADDR),BC
+		case 0x53: // LD (ADDR),DE
+				temp = R16( data, 4, 0x43 );
+			uint16_t addr = GET_ADDR();
+			pc += 2;
+			WRITE_WORD( addr, temp );
+			goto loop;
+		}
+		
+		case 0x4B: // LD BC,(ADDR)
+		case 0x5B:{// LD DE,(ADDR)
+			uint16_t addr = GET_ADDR();
+			pc += 2;
+			R16( data, 4, 0x4B ) = READ_WORD( addr );
+			goto loop;
+		}
+		
+		case 0x7B:{// LD SP,(ADDR)
+			uint16_t addr = GET_ADDR();
+			pc += 2;
+			sp = READ_WORD( addr );
+			goto loop;
+		}
+		
+		case 0x67:{// RRD
+			uint8_t temp = READ( rp.hl );
+			WRITE( rp.hl, (rg.a << 4) | (temp >> 4) );
+			temp = (rg.a & 0xF0) | (temp & 0x0F);
+			flags = (flags & C01) | SZ28P( temp );
+			rg.a = temp;
+			goto loop;
+		}
+		
+		case 0x6F:{// RLD
+			uint8_t temp = READ( rp.hl );
+			WRITE( rp.hl, (temp << 4) | (rg.a & 0x0F) );
+			temp = (rg.a & 0xF0) | (temp >> 4);
+			flags = (flags & C01) | SZ28P( temp );
+			rg.a = temp;
+			goto loop;
+		}
+		
+		CASE8( 44, 4C, 54, 5C, 64, 6C, 74, 7C ): // NEG
+			opcode = 0x10; // flag to do SBC instead of ADC
+			flags &= ~C01;
+			data = rg.a;
+			rg.a = 0;
+			goto adc_data;
+		
+		{
+			int inc;
+		case 0xA9: // CPD
+		case 0xB9: // CPDR
+			inc = -1;
+			if ( 0 )
+		case 0xA1: // CPI
+		case 0xB1: // CPIR
+				inc = +1;
+			uint16_t addr = rp.hl;
+			rp.hl = addr + inc;
+			int temp = READ( addr );
+			
+			int result = rg.a - temp;
+			flags = (flags & C01) | N02 |
+					((((temp ^ rg.a) & H10) ^ result) & (S80 | H10));
+			
+			if ( !(uint8_t) result ) flags |= Z40;
+			result -= (flags & H10) >> 4;
+			flags |= result & F08;
+			flags |= result << 4 & F20;
+			if ( !--rp.bc )
+				goto loop;
+			
+			flags |= V04;
+			if ( flags & Z40 || data < 0xB0 )
+				goto loop;
+			
+			pc -= 2;
+			s_time += 5;
+			goto loop;
+		}
+		
+		{
+			int inc;
+		case 0xA8: // LDD
+		case 0xB8: // LDDR
+			inc = -1;
+			if ( 0 )
+		case 0xA0: // LDI
+		case 0xB0: // LDIR
+				inc = +1;
+			uint16_t addr = rp.hl;
+			rp.hl = addr + inc;
+			int temp = READ( addr );
+			
+			addr = rp.de;
+			rp.de = addr + inc;
+			WRITE( addr, temp );
+			
+			temp += rg.a;
+			flags = (flags & (S80 | Z40 | C01)) |
+					(temp & F08) | (temp << 4 & F20);
+			if ( !--rp.bc )
+				goto loop;
+			
+			flags |= V04;
+			if ( data < 0xB0 )
+				goto loop;
+			
+			pc -= 2;
+			s_time += 5;
+			goto loop;
+		}
+		
+		{
+			int inc;
+		case 0xAB: // OUTD
+		case 0xBB: // OTDR
+			inc = -1;
+			if ( 0 )
+		case 0xA3: // OUTI
+		case 0xB3: // OTIR
+				inc = +1;
+			uint16_t addr = rp.hl;
+			rp.hl = addr + inc;
+			int temp = READ( addr );
+			
+			int b = --rg.b;
+			flags = (temp >> 6 & N02) | SZ28( b );
+			if ( b && data >= 0xB0 )
+			{
+				pc -= 2;
+				s_time += 5;
+			}
+			
+			OUT( rp.bc, temp );
+			goto loop;
+		}
+		
+		{
+			int inc;
+		case 0xAA: // IND
+		case 0xBA: // INDR
+			inc = -1;
+			if ( 0 )
+		case 0xA2: // INI
+		case 0xB2: // INIR
+				inc = +1;
+			
+			uint16_t addr = rp.hl;
+			rp.hl = addr + inc;
+			
+			int temp = IN( rp.bc );
+			
+			int b = --rg.b;
+			flags = (temp >> 6 & N02) | SZ28( b );
+			if ( b && data >= 0xB0 )
+			{
+				pc -= 2;
+				s_time += 5;
+			}
+			
+			WRITE( addr, temp );
+			goto loop;
+		}
+		
+		case 0x47: // LD I,A
+			r.i = rg.a;
+			goto loop;
+		
+		case 0x4F: // LD R,A
+			SET_R( rg.a );
+			debug_printf( "LD R,A not supported\n" );
+			warning = true;
+			goto loop;
+		
+		case 0x57: // LD A,I
+			rg.a = r.i;
+			goto ld_ai_common;
+		
+		case 0x5F: // LD A,R
+			rg.a = GET_R();
+			debug_printf( "LD A,R not supported\n" );
+			warning = true;
+		ld_ai_common:
+			flags = (flags & C01) | SZ28( rg.a ) | (r.iff2 << 2 & V04);
+			goto loop;
+		
+		CASE8( 45, 4D, 55, 5D, 65, 6D, 75, 7D ): // RETI/RETN
+			r.iff1 = r.iff2;
+			goto ret_taken;
+		
+		case 0x46: case 0x4E: case 0x66: case 0x6E: // IM 0
+			r.im = 0;
+			goto loop;
+		
+		case 0x56: case 0x76: // IM 1
+			r.im = 1;
+			goto loop;
+		
+		case 0x5E: case 0x7E: // IM 2
+			r.im = 2;
+			goto loop;
+		
+		default:
+			debug_printf( "Opcode $ED $%02X not supported\n", data );
+			warning = true;
+			goto loop;
+		}
+		assert( false );
+	}
+
+//////////////////////////////////////// DD/FD prefix
+	{
+	uint16_t ixy;
+	case 0xDD:
+		ixy = ix;
+		goto ix_prefix;
+	case 0xFD:
+		ixy = iy;
+	ix_prefix:
+		pc++;
+		unsigned data2 = READ_PROG( pc );
+		s_time += ed_dd_timing [data] & 0x0F;
+		switch ( data )
+		{
+	// TODO: more efficient way of avoid negative address
+	#define IXY_DISP( ixy, disp )   uint16_t ((ixy) + (disp))
+	
+	#define SET_IXY( in ) if ( opcode == 0xDD ) ix = in; else iy = in;
+	
+	// ADD/ADC/SUB/SBC
+	
+		case 0x96: // SUB (IXY+disp)
+		case 0x86: // ADD (IXY+disp)
+			flags &= ~C01;
+		case 0x9E: // SBC (IXY+disp)
+		case 0x8E: // ADC (IXY+disp)
+			pc++;
+			opcode = data;
+			data = READ( IXY_DISP( ixy, (int8_t) data2 ) );
+			goto adc_data;
+		
+		case 0x94: // SUB HXY
+		case 0x84: // ADD HXY
+			flags &= ~C01;
+		case 0x9C: // SBC HXY
+		case 0x8C: // ADC HXY
+			opcode = data;
+			data = ixy >> 8;
+			goto adc_data;
+		
+		case 0x95: // SUB LXY
+		case 0x85: // ADD LXY
+			flags &= ~C01;
+		case 0x9D: // SBC LXY
+		case 0x8D: // ADC LXY
+			opcode = data;
+			data = (uint8_t) ixy;
+			goto adc_data;
+		
+		{
+			unsigned temp;
+		case 0x39: // ADD IXY,SP
+			temp = sp;
+			goto add_ixy_data;
+		
+		case 0x29: // ADD IXY,HL
+			temp = ixy;
+			goto add_ixy_data;
+		
+		case 0x09: // ADD IXY,BC
+		case 0x19: // ADD IXY,DE
+			temp = R16( data, 4, 0x09 );
+		add_ixy_data: {
+			blargg_ulong sum = ixy + temp;
+			temp ^= ixy;
+			ixy = (uint16_t) sum;
+			flags = (flags & (S80 | Z40 | V04)) |
+					(sum >> 16) |
+					(sum >> 8 & (F20 | F08)) |
+					((temp ^ sum) >> 8 & H10);
+			goto set_ixy;
+		}
+		}
+	
+	// AND
+		case 0xA6: // AND (IXY+disp)
+			pc++;
+			data = READ( IXY_DISP( ixy, (int8_t) data2 ) );
+			goto and_data;
+		
+		case 0xA4: // AND HXY
+			data = ixy >> 8;
+			goto and_data;
+		
+		case 0xA5: // AND LXY
+			data = (uint8_t) ixy;
+			goto and_data;
+	
+	// OR
+		case 0xB6: // OR (IXY+disp)
+			pc++;
+			data = READ( IXY_DISP( ixy, (int8_t) data2 ) );
+			goto or_data;
+		
+		case 0xB4: // OR HXY
+			data = ixy >> 8;
+			goto or_data;
+		
+		case 0xB5: // OR LXY
+			data = (uint8_t) ixy;
+			goto or_data;
+	
+	// XOR
+		case 0xAE: // XOR (IXY+disp)
+			pc++;
+			data = READ( IXY_DISP( ixy, (int8_t) data2 ) );
+			goto xor_data;
+		
+		case 0xAC: // XOR HXY
+			data = ixy >> 8;
+			goto xor_data;
+		
+		case 0xAD: // XOR LXY
+			data = (uint8_t) ixy;
+			goto xor_data;
+	
+	// CP
+		case 0xBE: // CP (IXY+disp)
+			pc++;
+			data = READ( IXY_DISP( ixy, (int8_t) data2 )  );
+			goto cp_data;
+		
+		case 0xBC: // CP HXY
+			data = ixy >> 8;
+			goto cp_data;
+		
+		case 0xBD: // CP LXY
+			data = (uint8_t) ixy;
+			goto cp_data;
+		
+	// LD
+		CASE7( 70, 71, 72, 73, 74, 75, 77 ): // LD (IXY+disp),r
+			data = R8( data, 0x70 );
+			if ( 0 )
+		case 0x36: // LD (IXY+disp),imm
+				pc++, data = READ_PROG( pc );
+			pc++;
+			WRITE( IXY_DISP( ixy, (int8_t) data2 ), data );
+			goto loop;
+
+		CASE5( 44, 4C, 54, 5C, 7C ): // LD r,HXY
+			R8( data >> 3, 8 ) = ixy >> 8;
+			goto loop;
+		
+		case 0x64: // LD HXY,HXY
+		case 0x6D: // LD LXY,LXY
+			goto loop;
+		
+		CASE5( 45, 4D, 55, 5D, 7D ): // LD r,LXY
+			R8( data >> 3, 8 ) = ixy;
+			goto loop;
+		
+		CASE7( 46, 4E, 56, 5E, 66, 6E, 7E ): // LD r,(IXY+disp)
+			pc++;
+			R8( data >> 3, 8 ) = READ( IXY_DISP( ixy, (int8_t) data2 ) );
+			goto loop;
+		
+		case 0x26: // LD HXY,imm
+			pc++;
+			goto ld_hxy_data;
+			
+		case 0x65: // LD HXY,LXY
+			data2 = (uint8_t) ixy;
+			goto ld_hxy_data;
+		
+		CASE5( 60, 61, 62, 63, 67 ): // LD HXY,r
+			data2 = R8( data, 0x60 );
+		ld_hxy_data:
+			ixy = (uint8_t) ixy | (data2 << 8);
+			goto set_ixy;
+		
+		case 0x2E: // LD LXY,imm
+			pc++;
+			goto ld_lxy_data;
+			
+		case 0x6C: // LD LXY,HXY
+			data2 = ixy >> 8;
+			goto ld_lxy_data;
+		
+		CASE5( 68, 69, 6A, 6B, 6F ): // LD LXY,r
+			data2 = R8( data, 0x68 );
+		ld_lxy_data:
+			ixy = (ixy & 0xFF00) | data2;
+		set_ixy:
+			if ( opcode == 0xDD )
+			{
+				ix = ixy;
+				goto loop;
+			}
+			iy = ixy;
+			goto loop;
+
+		case 0xF9: // LD SP,IXY
+			sp = ixy;
+			goto loop;
+	
+		case 0x22:{// LD (ADDR),IXY
+			uint16_t addr = GET_ADDR();
+			pc += 2;
+			WRITE_WORD( addr, ixy );
+			goto loop;
+		}
+		
+		case 0x21: // LD IXY,imm
+			ixy = GET_ADDR();
+			pc += 2;
+			goto set_ixy;
+		
+		case 0x2A:{// LD IXY,(addr)
+			uint16_t addr = GET_ADDR();
+			ixy = READ_WORD( addr );
+			pc += 2;
+			goto set_ixy;
+		}
+		
+	// DD/FD CB prefix
+		case 0xCB: {
+			data = IXY_DISP( ixy, (int8_t) data2 );
+			pc++;
+			data2 = READ_PROG( pc );
+			pc++;
+			switch ( data2 )
+			{
+			case 0x06: goto rlc_data_addr; // RLC (IXY)
+			case 0x16: goto rl_data_addr;  // RL (IXY)
+			case 0x26: goto sla_data_addr; // SLA (IXY)
+			case 0x36: goto sll_data_addr; // SLL (IXY)
+			case 0x0E: goto rrc_data_addr; // RRC (IXY)
+			case 0x1E: goto rr_data_addr;  // RR (IXY)
+			case 0x2E: goto sra_data_addr; // SRA (IXY)
+			case 0x3E: goto srl_data_addr; // SRL (IXY)
+			
+			CASE8( 46, 4E, 56, 5E, 66, 6E, 76, 7E ):{// BIT b,(IXY+disp)
+				uint8_t temp = READ( data );
+				int masked = temp & 1 << (data2 >> 3 & 7);
+				flags = (flags & C01) | H10 |
+						(masked & S80) |
+						((masked - 1) >> 8 & (Z40 | P04));
+				goto loop;
+			}
+			
+			CASE8( 86, 8E, 96, 9E, A6, AE, B6, BE ): // RES b,(IXY+disp)
+			CASE8( C6, CE, D6, DE, E6, EE, F6, FE ):{// SET b,(IXY+disp)
+				int temp = READ( data );
+				int bit = 1 << (data2 >> 3 & 7);
+				temp |= bit; // SET
+				if ( !(data2 & 0x40) )
+					temp ^= bit; // RES
+				WRITE( data, temp );
+				goto loop;
+			}
+			
+			default:
+				debug_printf( "Opcode $%02X $CB $%02X not supported\n", opcode, data2 );
+				warning = true;
+				goto loop;
+			}
+			assert( false );
+		}
+		
+	// INC/DEC
+		case 0x23: // INC IXY
+			ixy = uint16_t (ixy + 1);
+			goto set_ixy;
+		
+		case 0x2B: // DEC IXY
+			ixy = uint16_t (ixy - 1);
+			goto set_ixy;
+		
+		case 0x34: // INC (IXY+disp)
+			ixy = IXY_DISP( ixy, (int8_t) data2 );
+			pc++;
+			data = READ( ixy ) + 1;
+			WRITE( ixy, data );
+			goto inc_set_flags;
+		
+		case 0x35: // DEC (IXY+disp)
+			ixy = IXY_DISP( ixy, (int8_t) data2 );
+			pc++;
+			data = READ( ixy ) - 1;
+			WRITE( ixy, data );
+			goto dec_set_flags;
+		
+		case 0x24: // INC HXY
+			ixy = uint16_t (ixy + 0x100);
+			data = ixy >> 8;
+			goto inc_xy_common;
+		
+		case 0x2C: // INC LXY
+			data = uint8_t (ixy + 1);
+			ixy = (ixy & 0xFF00) | data;
+		inc_xy_common:
+			if ( opcode == 0xDD )
+			{
+				ix = ixy;
+				goto inc_set_flags;
+			}
+			iy = ixy;
+			goto inc_set_flags;
+		
+		case 0x25: // DEC HXY
+			ixy = uint16_t (ixy - 0x100);
+			data = ixy >> 8;
+			goto dec_xy_common;
+		
+		case 0x2D: // DEC LXY
+			data = uint8_t (ixy - 1);
+			ixy = (ixy & 0xFF00) | data;
+		dec_xy_common:
+			if ( opcode == 0xDD )
+			{
+				ix = ixy;
+				goto dec_set_flags;
+			}
+			iy = ixy;
+			goto dec_set_flags;
+		
+	// PUSH/POP
+		case 0xE5: // PUSH IXY
+			data = ixy;
+			goto push_data;
+		
+		case 0xE1:{// POP IXY
+			ixy = READ_WORD( sp );
+			sp = uint16_t (sp + 2);
+			goto set_ixy;
+		}
+	
+	// Misc
+		
+		case 0xE9: // JP (IXY)
+			pc = ixy;
+			goto loop;
+		
+		case 0xE3:{// EX (SP),IXY
+			uint16_t temp = READ_WORD( sp );
+			WRITE_WORD( sp, ixy );
+			ixy = temp;
+			goto set_ixy;
+		}
+		
+		default:
+			debug_printf( "Unnecessary DD/FD prefix encountered\n" );
+			warning = true;
+			pc--;
+			goto loop;
+		}
+		assert( false );
+	}
+	
+	}
+	debug_printf( "Unhandled main opcode: $%02X\n", opcode );
+	assert( false );
+	
+halt:
+	s_time &= 3; // increment by multiple of 4
+out_of_time:
+	pc--;
+	
+	s.time   = s_time;
+	rg.flags = flags;
+	r.ix     = ix;
+	r.iy     = iy;
+	r.sp     = sp;
+	r.pc     = pc;
+	this->r.b = rg;
+	this->state_ = s;
+	this->state = &this->state_;
+	
+	return warning;
+}
diff --git a/libraries/game-music-emu/gme/Ay_Cpu.h b/libraries/game-music-emu/gme/Ay_Cpu.h
new file mode 100644
index 000000000..6984b42dc
--- /dev/null
+++ b/libraries/game-music-emu/gme/Ay_Cpu.h
@@ -0,0 +1,89 @@
+// Z80 CPU emulator
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef AY_CPU_H
+#define AY_CPU_H
+
+#include "blargg_endian.h"
+
+typedef blargg_long cpu_time_t;
+
+// must be defined by caller
+void ay_cpu_out( class Ay_Cpu*, cpu_time_t, unsigned addr, int data );
+int ay_cpu_in( class Ay_Cpu*, unsigned addr );
+
+class Ay_Cpu {
+public:
+	// Clear all registers and keep pointer to 64K memory passed in
+	void reset( void* mem_64k );
+	
+	// Run until specified time is reached. Returns true if suspicious/unsupported
+	// instruction was encountered at any point during run.
+	bool run( cpu_time_t end_time );
+	
+	// Time of beginning of next instruction
+	cpu_time_t time() const             { return state->time + state->base; }
+	
+	// Alter current time. Not supported during run() call.
+	void set_time( cpu_time_t t )       { state->time = t - state->base; }
+	void adjust_time( int delta )       { state->time += delta; }
+	
+	#if BLARGG_BIG_ENDIAN
+		struct regs_t { uint8_t b, c, d, e, h, l, flags, a; };
+	#else
+		struct regs_t { uint8_t c, b, e, d, l, h, a, flags; };
+	#endif
+	BOOST_STATIC_ASSERT( sizeof (regs_t) == 8 );
+	
+	struct pairs_t { uint16_t bc, de, hl, fa; };
+	
+	// Registers are not updated until run() returns
+	struct registers_t {
+		uint16_t pc;
+		uint16_t sp;
+		uint16_t ix;
+		uint16_t iy;
+		union {
+			regs_t b; //  b.b, b.c, b.d, b.e, b.h, b.l, b.flags, b.a
+			pairs_t w; // w.bc, w.de, w.hl. w.fa
+		};
+		union {
+			regs_t b;
+			pairs_t w;
+		} alt;
+		uint8_t iff1;
+		uint8_t iff2;
+		uint8_t r;
+		uint8_t i;
+		uint8_t im;
+	};
+	//registers_t r; (below for efficiency)
+	
+	// can read this far past end of memory
+	enum { cpu_padding = 0x100 };
+	
+public:
+	Ay_Cpu();
+private:
+	uint8_t szpc [0x200];
+	uint8_t* mem;
+	cpu_time_t end_time_;
+	struct state_t {
+		cpu_time_t base;
+		cpu_time_t time;
+	};
+	state_t* state; // points to state_ or a local copy within run()
+	state_t state_;
+	void set_end_time( cpu_time_t t );
+public:
+	registers_t r;
+};
+
+inline void Ay_Cpu::set_end_time( cpu_time_t t )
+{
+	cpu_time_t delta = state->base - t;
+	state->base = t;
+	state->time += delta;
+}
+
+#endif
diff --git a/libraries/game-music-emu/gme/Ay_Emu.cpp b/libraries/game-music-emu/gme/Ay_Emu.cpp
new file mode 100644
index 000000000..a973ba0f1
--- /dev/null
+++ b/libraries/game-music-emu/gme/Ay_Emu.cpp
@@ -0,0 +1,405 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Ay_Emu.h"
+
+#include "blargg_endian.h"
+#include <string.h>
+
+/* Copyright (C) 2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+long const spectrum_clock = 3546900;
+long const cpc_clock      = 2000000;
+
+unsigned const ram_start = 0x4000;
+int const osc_count = Ay_Apu::osc_count + 1;
+
+Ay_Emu::Ay_Emu()
+{
+	beeper_output = 0;
+	set_type( gme_ay_type );
+	
+	static const char* const names [osc_count] = {
+		"Wave 1", "Wave 2", "Wave 3", "Beeper"
+	};
+	set_voice_names( names );
+	
+	static int const types [osc_count] = {
+		wave_type | 0, wave_type | 1, wave_type | 2, mixed_type | 0
+	};
+	set_voice_types( types );
+	set_silence_lookahead( 6 );
+}
+
+Ay_Emu::~Ay_Emu() { }
+
+// Track info
+
+static byte const* get_data( Ay_Emu::file_t const& file, byte const* ptr, int min_size )
+{
+	long pos = ptr - (byte const*) file.header;
+	long file_size = file.end - (byte const*) file.header;
+	assert( (unsigned long) pos <= (unsigned long) file_size - 2 );
+	int offset = (int16_t) get_be16( ptr );
+	if ( !offset || blargg_ulong (pos + offset) > blargg_ulong (file_size - min_size) )
+		return 0;
+	return ptr + offset;
+}
+
+static blargg_err_t parse_header( byte const* in, long size, Ay_Emu::file_t* out )
+{
+	typedef Ay_Emu::header_t header_t;
+	out->header = (header_t const*) in;
+	out->end    = in + size;
+	
+	if ( size < Ay_Emu::header_size )
+		return gme_wrong_file_type;
+	
+	header_t const& h = *(header_t const*) in;
+	if ( memcmp( h.tag, "ZXAYEMUL", 8 ) )
+		return gme_wrong_file_type;
+	
+	out->tracks = get_data( *out, h.track_info, (h.max_track + 1) * 4 );
+	if ( !out->tracks )
+		return "Missing track data";
+	
+	return 0;
+}
+
+static void copy_ay_fields( Ay_Emu::file_t const& file, track_info_t* out, int track )
+{
+	Gme_File::copy_field_( out->song, (char const*) get_data( file, file.tracks + track * 4, 1 ) );
+	byte const* track_info = get_data( file, file.tracks + track * 4 + 2, 6 );
+	if ( track_info )
+		out->length = get_be16( track_info + 4 ) * (1000L / 50); // frames to msec
+	
+	Gme_File::copy_field_( out->author,  (char const*) get_data( file, file.header->author, 1 ) );
+	Gme_File::copy_field_( out->comment, (char const*) get_data( file, file.header->comment, 1 ) );
+}
+
+blargg_err_t Ay_Emu::track_info_( track_info_t* out, int track ) const
+{
+	copy_ay_fields( file, out, track );
+	return 0;
+}
+
+struct Ay_File : Gme_Info_
+{
+	Ay_Emu::file_t file;
+	
+	Ay_File() { set_type( gme_ay_type ); }
+	
+	blargg_err_t load_mem_( byte const* begin, long size )
+	{
+		RETURN_ERR( parse_header( begin, size, &file ) );
+		set_track_count( file.header->max_track + 1 );
+		return 0;
+	}
+	
+	blargg_err_t track_info_( track_info_t* out, int track ) const
+	{
+		copy_ay_fields( file, out, track );
+		return 0;
+	}
+};
+
+static Music_Emu* new_ay_emu () { return BLARGG_NEW Ay_Emu ; }
+static Music_Emu* new_ay_file() { return BLARGG_NEW Ay_File; }
+
+static gme_type_t_ const gme_ay_type_ = { "ZX Spectrum", 0, &new_ay_emu, &new_ay_file, "AY", 1 };
+BLARGG_EXPORT extern gme_type_t const gme_ay_type = &gme_ay_type_;
+
+// Setup
+
+blargg_err_t Ay_Emu::load_mem_( byte const* in, long size )
+{
+	assert( offsetof (header_t,track_info [2]) == header_size );
+	
+	RETURN_ERR( parse_header( in, size, &file ) );
+	set_track_count( file.header->max_track + 1 );
+	
+	if ( file.header->vers > 2 )
+		set_warning( "Unknown file version" );
+	
+	set_voice_count( osc_count );
+	apu.volume( gain() );
+	
+	return setup_buffer( spectrum_clock );
+}
+	
+void Ay_Emu::update_eq( blip_eq_t const& eq )
+{
+	apu.treble_eq( eq );
+}
+
+void Ay_Emu::set_voice( int i, Blip_Buffer* center, Blip_Buffer*, Blip_Buffer* )
+{
+	if ( i >= Ay_Apu::osc_count )
+		beeper_output = center;
+	else
+		apu.osc_output( i, center );
+}
+
+// Emulation
+
+void Ay_Emu::set_tempo_( double t )
+{
+	play_period = blip_time_t (clock_rate() / 50 / t);
+}
+
+blargg_err_t Ay_Emu::start_track_( int track )
+{
+	RETURN_ERR( Classic_Emu::start_track_( track ) );
+	
+	memset( mem.ram + 0x0000, 0xC9, 0x100 ); // fill RST vectors with RET
+	memset( mem.ram + 0x0100, 0xFF, 0x4000 - 0x100 );
+	memset( mem.ram + ram_start, 0x00, sizeof mem.ram - ram_start );
+	memset( mem.padding1, 0xFF, sizeof mem.padding1 );
+	memset( mem.ram + 0x10000, 0xFF, sizeof mem.ram - 0x10000 );
+	
+	// locate data blocks
+	byte const* const data = get_data( file, file.tracks + track * 4 + 2, 14 );
+	if ( !data ) return "File data missing";
+	
+	byte const* const more_data = get_data( file, data + 10, 6 );
+	if ( !more_data ) return "File data missing";
+	
+	byte const* blocks = get_data( file, data + 12, 8 );
+	if ( !blocks ) return "File data missing";
+	
+	// initial addresses
+	cpu::reset( mem.ram );
+	r.sp = get_be16( more_data );
+	r.b.a = r.b.b = r.b.d = r.b.h = data [8];
+	r.b.flags = r.b.c = r.b.e = r.b.l = data [9];
+	r.alt.w = r.w;
+	r.ix = r.iy = r.w.hl;
+	
+	unsigned addr = get_be16( blocks );
+	if ( !addr ) return "File data missing";
+	
+	unsigned init = get_be16( more_data + 2 );
+	if ( !init )
+		init = addr;
+	
+	// copy blocks into memory
+	do
+	{
+		blocks += 2;
+		unsigned len = get_be16( blocks ); blocks += 2;
+		if ( addr + len > 0x10000 )
+		{
+			set_warning( "Bad data block size" );
+			len = 0x10000 - addr;
+		}
+		check( len );
+		byte const* in = get_data( file, blocks, 0 ); blocks += 2;
+		if ( len > blargg_ulong (file.end - in) )
+		{
+			set_warning( "Missing file data" );
+			len = file.end - in;
+		}
+		//debug_printf( "addr: $%04X, len: $%04X\n", addr, len );
+		if ( addr < ram_start && addr >= 0x400 ) // several tracks use low data
+			debug_printf( "Block addr in ROM\n" );
+		memcpy( mem.ram + addr, in, len );
+		
+		if ( file.end - blocks < 8 )
+		{
+			set_warning( "Missing file data" );
+			break;
+		}
+	}
+	while ( (addr = get_be16( blocks )) != 0 );
+	
+	// copy and configure driver
+	static byte const passive [] = {
+		0xF3,       // DI
+		0xCD, 0, 0, // CALL init
+		0xED, 0x5E, // LOOP: IM 2
+		0xFB,       // EI
+		0x76,       // HALT
+		0x18, 0xFA  // JR LOOP
+	};
+	static byte const active [] = {
+		0xF3,       // DI
+		0xCD, 0, 0, // CALL init
+		0xED, 0x56, // LOOP: IM 1
+		0xFB,       // EI
+		0x76,       // HALT
+		0xCD, 0, 0, // CALL play
+		0x18, 0xF7  // JR LOOP
+	};
+	memcpy( mem.ram, passive, sizeof passive );
+	unsigned play_addr = get_be16( more_data + 4 );
+	//debug_printf( "Play: $%04X\n", play_addr );
+	if ( play_addr )
+	{
+		memcpy( mem.ram, active, sizeof active );
+		mem.ram [ 9] = play_addr;
+		mem.ram [10] = play_addr >> 8;
+	}
+	mem.ram [2] = init;
+	mem.ram [3] = init >> 8;
+	
+	mem.ram [0x38] = 0xFB; // Put EI at interrupt vector (followed by RET)
+	
+	memcpy( mem.ram + 0x10000, mem.ram, 0x80 ); // some code wraps around (ugh)
+	
+	beeper_delta = int (apu.amp_range * 0.65);
+	last_beeper = 0;
+	apu.reset();
+	next_play = play_period;
+	
+	// start at spectrum speed
+	change_clock_rate( spectrum_clock );
+	set_tempo( tempo() );
+	
+	spectrum_mode = false;
+	cpc_mode      = false;
+	cpc_latch     = 0;
+	
+	return 0;
+}
+
+// Emulation
+
+void Ay_Emu::cpu_out_misc( cpu_time_t time, unsigned addr, int data )
+{
+	if ( !cpc_mode )
+	{
+		switch ( addr & 0xFEFF )
+		{
+		case 0xFEFD:
+			spectrum_mode = true;
+			apu_addr = data & 0x0F;
+			return;
+		
+		case 0xBEFD:
+			spectrum_mode = true;
+			apu.write( time, apu_addr, data );
+			return;
+		}
+	}
+	
+	if ( !spectrum_mode )
+	{
+		switch ( addr >> 8 )
+		{
+		case 0xF6:
+			switch ( data & 0xC0 )
+			{
+			case 0xC0:
+				apu_addr = cpc_latch & 0x0F;
+				goto enable_cpc;
+			
+			case 0x80:
+				apu.write( time, apu_addr, cpc_latch );
+				goto enable_cpc;
+			}
+			break;
+		
+		case 0xF4:
+			cpc_latch = data;
+			goto enable_cpc;
+		}
+	}
+	
+	debug_printf( "Unmapped OUT: $%04X <- $%02X\n", addr, data );
+	return;
+	
+enable_cpc:
+	if ( !cpc_mode )
+	{
+		cpc_mode = true;
+		change_clock_rate( cpc_clock );
+		set_tempo( tempo() );
+	}
+}
+
+void ay_cpu_out( Ay_Cpu* cpu, cpu_time_t time, unsigned addr, int data )
+{
+	Ay_Emu& emu = STATIC_CAST(Ay_Emu&,*cpu);
+	
+	if ( (addr & 0xFF) == 0xFE && !emu.cpc_mode )
+	{
+		int delta = emu.beeper_delta;
+		data &= 0x10;
+		if ( emu.last_beeper != data )
+		{
+			emu.last_beeper = data;
+			emu.beeper_delta = -delta;
+			emu.spectrum_mode = true;
+			if ( emu.beeper_output )
+				emu.apu.synth_.offset( time, delta, emu.beeper_output );
+		}
+	}
+	else
+	{
+		emu.cpu_out_misc( time, addr, data );
+	}
+}
+
+int ay_cpu_in( Ay_Cpu*, unsigned addr )
+{
+	// keyboard read and other things
+	if ( (addr & 0xFF) == 0xFE )
+		return 0xFF; // other values break some beeper tunes
+	
+	debug_printf( "Unmapped IN : $%04X\n", addr );
+	return 0xFF;
+}
+
+blargg_err_t Ay_Emu::run_clocks( blip_time_t& duration, int )
+{
+	set_time( 0 );
+	if ( !(spectrum_mode | cpc_mode) )
+		duration /= 2; // until mode is set, leave room for halved clock rate
+	
+	while ( time() < duration )
+	{
+		cpu::run( min( duration, (blip_time_t) next_play ) );
+		
+		if ( time() >= next_play )
+		{
+			next_play += play_period;
+			
+			if ( r.iff1 )
+			{
+				if ( mem.ram [r.pc] == 0x76 )
+					r.pc++;
+				
+				r.iff1 = r.iff2 = 0;
+				
+				mem.ram [--r.sp] = uint8_t (r.pc >> 8);
+				mem.ram [--r.sp] = uint8_t (r.pc);
+				r.pc = 0x38;
+				cpu::adjust_time( 12 );
+				if ( r.im == 2 )
+				{
+					cpu::adjust_time( 6 );
+					unsigned addr = r.i * 0x100u + 0xFF;
+					r.pc = mem.ram [(addr + 1) & 0xFFFF] * 0x100u + mem.ram [addr];
+				}
+			}
+		}
+	}
+	duration = time();
+	next_play -= duration;
+	check( next_play >= 0 );
+	adjust_time( -duration );
+	
+	apu.end_frame( duration );
+	
+	return 0;
+}
diff --git a/libraries/game-music-emu/gme/Ay_Emu.h b/libraries/game-music-emu/gme/Ay_Emu.h
new file mode 100644
index 000000000..6726f0157
--- /dev/null
+++ b/libraries/game-music-emu/gme/Ay_Emu.h
@@ -0,0 +1,69 @@
+// Sinclair Spectrum AY music file emulator
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef AY_EMU_H
+#define AY_EMU_H
+
+#include "Classic_Emu.h"
+#include "Ay_Apu.h"
+#include "Ay_Cpu.h"
+
+class Ay_Emu : private Ay_Cpu, public Classic_Emu {
+	typedef Ay_Cpu cpu;
+public:
+	// AY file header
+	enum { header_size = 0x14 };
+	struct header_t
+	{
+		byte tag [8];
+		byte vers;
+		byte player;
+		byte unused [2];
+		byte author [2];
+		byte comment [2];
+		byte max_track;
+		byte first_track;
+		byte track_info [2];
+	};
+	
+	static gme_type_t static_type() { return gme_ay_type; }
+public:
+	Ay_Emu();
+	~Ay_Emu();
+	struct file_t {
+		header_t const* header;
+		byte const* end;
+		byte const* tracks;
+	};
+protected:
+	blargg_err_t track_info_( track_info_t*, int track ) const;
+	blargg_err_t load_mem_( byte const*, long );
+	blargg_err_t start_track_( int );
+	blargg_err_t run_clocks( blip_time_t&, int );
+	void set_tempo_( double );
+	void set_voice( int, Blip_Buffer*, Blip_Buffer*, Blip_Buffer* );
+	void update_eq( blip_eq_t const& );
+private:
+	file_t file;
+	
+	cpu_time_t play_period;
+	cpu_time_t next_play;
+	Blip_Buffer* beeper_output;
+	int beeper_delta;
+	int last_beeper;
+	int apu_addr;
+	int cpc_latch;
+	bool spectrum_mode;
+	bool cpc_mode;
+	
+	// large items
+	struct {
+		byte padding1 [0x100];
+		byte ram [0x10000 + 0x100];
+	} mem;
+	Ay_Apu apu;
+	friend void ay_cpu_out( Ay_Cpu*, cpu_time_t, unsigned addr, int data );
+	void cpu_out_misc( cpu_time_t, unsigned addr, int data );
+};
+
+#endif
diff --git a/libraries/game-music-emu/gme/Blip_Buffer.cpp b/libraries/game-music-emu/gme/Blip_Buffer.cpp
new file mode 100644
index 000000000..2b88cd4f8
--- /dev/null
+++ b/libraries/game-music-emu/gme/Blip_Buffer.cpp
@@ -0,0 +1,460 @@
+// Blip_Buffer 0.4.1. http://www.slack.net/~ant/
+
+#include "Blip_Buffer.h"
+
+#include <assert.h>
+#include <limits.h>
+#include <string.h>
+#include <stdlib.h>
+#include <math.h>
+
+/* Copyright (C) 2003-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#ifdef BLARGG_ENABLE_OPTIMIZER
+	#include BLARGG_ENABLE_OPTIMIZER
+#endif
+
+int const silent_buf_size = 1; // size used for Silent_Blip_Buffer
+
+Blip_Buffer::Blip_Buffer()
+{
+	factor_       = (blip_ulong)-1 / 2;
+	offset_       = 0;
+	buffer_       = 0;
+	buffer_size_  = 0;
+	sample_rate_  = 0;
+	reader_accum_ = 0;
+	bass_shift_   = 0;
+	clock_rate_   = 0;
+	bass_freq_    = 16;
+	length_       = 0;
+	
+	// assumptions code makes about implementation-defined features
+	#ifndef NDEBUG
+		// right shift of negative value preserves sign
+		buf_t_ i = -0x7FFFFFFE;
+		assert( (i >> 1) == -0x3FFFFFFF );
+		
+		// casting to short truncates to 16 bits and sign-extends
+		i = 0x18000;
+		assert( (short) i == -0x8000 );
+	#endif
+}
+
+Blip_Buffer::~Blip_Buffer()
+{
+	if ( buffer_size_ != silent_buf_size )
+		free( buffer_ );
+}
+
+Silent_Blip_Buffer::Silent_Blip_Buffer()
+{
+	factor_      = 0;
+	buffer_      = buf;
+	buffer_size_ = silent_buf_size;
+	memset( buf, 0, sizeof buf ); // in case machine takes exception for signed overflow
+}
+
+void Blip_Buffer::clear( int entire_buffer )
+{
+	offset_      = 0;
+	reader_accum_ = 0;
+	modified_    = 0;
+	if ( buffer_ )
+	{
+		long count = (entire_buffer ? buffer_size_ : samples_avail());
+		memset( buffer_, 0, (count + blip_buffer_extra_) * sizeof (buf_t_) );
+	}
+}
+
+Blip_Buffer::blargg_err_t Blip_Buffer::set_sample_rate( long new_rate, int msec )
+{
+	if ( buffer_size_ == silent_buf_size )
+	{
+		assert( 0 );
+		return "Internal (tried to resize Silent_Blip_Buffer)";
+	}
+	
+	// start with maximum length that resampled time can represent
+	long new_size = (UINT_MAX >> BLIP_BUFFER_ACCURACY) - blip_buffer_extra_ - 64;
+	if ( msec != blip_max_length )
+	{
+		long s = (new_rate * (msec + 1) + 999) / 1000;
+		if ( s < new_size )
+			new_size = s;
+		else
+			assert( 0 ); // fails if requested buffer length exceeds limit
+	}
+	
+	if ( buffer_size_ != new_size )
+	{
+		void* p = realloc( buffer_, (new_size + blip_buffer_extra_) * sizeof *buffer_ );
+		if ( !p )
+			return "Out of memory";
+		buffer_ = (buf_t_*) p;
+	}
+	
+	buffer_size_ = new_size;
+	assert( buffer_size_ != silent_buf_size );
+	
+	// update things based on the sample rate
+	sample_rate_ = new_rate;
+	length_ = new_size * 1000 / new_rate - 1;
+	if ( msec )
+		assert( length_ == msec ); // ensure length is same as that passed in
+	if ( clock_rate_ )
+		clock_rate( clock_rate_ );
+	bass_freq( bass_freq_ );
+	
+	clear();
+	
+	return 0; // success
+}
+
+blip_resampled_time_t Blip_Buffer::clock_rate_factor( long rate ) const
+{
+	double ratio = (double) sample_rate_ / rate;
+	blip_long factor = (blip_long) floor( ratio * (1L << BLIP_BUFFER_ACCURACY) + 0.5 );
+	assert( factor > 0 || !sample_rate_ ); // fails if clock/output ratio is too large
+	return (blip_resampled_time_t) factor;
+}
+
+void Blip_Buffer::bass_freq( int freq )
+{
+	bass_freq_ = freq;
+	int shift = 31;
+	if ( freq > 0 )
+	{
+		shift = 13;
+		long f = (freq << 16) / sample_rate_;
+		while ( (f >>= 1) && --shift ) { }
+	}
+	bass_shift_ = shift;
+}
+
+void Blip_Buffer::end_frame( blip_time_t t )
+{
+	offset_ += t * factor_;
+	assert( samples_avail() <= (long) buffer_size_ ); // time outside buffer length
+}
+
+void Blip_Buffer::remove_silence( long count )
+{
+	assert( count <= samples_avail() ); // tried to remove more samples than available
+	offset_ -= (blip_resampled_time_t) count << BLIP_BUFFER_ACCURACY;
+}
+
+long Blip_Buffer::count_samples( blip_time_t t ) const
+{
+	unsigned long last_sample  = resampled_time( t ) >> BLIP_BUFFER_ACCURACY;
+	unsigned long first_sample = offset_ >> BLIP_BUFFER_ACCURACY;
+	return (long) (last_sample - first_sample);
+}
+
+blip_time_t Blip_Buffer::count_clocks( long count ) const
+{
+	if ( !factor_ )
+	{
+		assert( 0 ); // sample rate and clock rates must be set first
+		return 0;
+	}
+	
+	if ( count > buffer_size_ )
+		count = buffer_size_;
+	blip_resampled_time_t time = (blip_resampled_time_t) count << BLIP_BUFFER_ACCURACY;
+	return (blip_time_t) ((time - offset_ + factor_ - 1) / factor_);
+}
+
+void Blip_Buffer::remove_samples( long count )
+{
+	if ( count )
+	{
+		remove_silence( count );
+		
+		// copy remaining samples to beginning and clear old samples
+		long remain = samples_avail() + blip_buffer_extra_;
+		memmove( buffer_, buffer_ + count, remain * sizeof *buffer_ );
+		memset( buffer_ + remain, 0, count * sizeof *buffer_ );
+	}
+}
+
+// Blip_Synth_
+
+Blip_Synth_Fast_::Blip_Synth_Fast_()
+{
+	buf = 0;
+	last_amp = 0;
+	delta_factor = 0;
+}
+
+void Blip_Synth_Fast_::volume_unit( double new_unit )
+{
+	delta_factor = int (new_unit * (1L << blip_sample_bits) + 0.5);
+}
+
+#if !BLIP_BUFFER_FAST
+
+Blip_Synth_::Blip_Synth_( short* p, int w ) :
+	impulses( p ),
+	width( w )
+{
+	volume_unit_ = 0.0;
+	kernel_unit = 0;
+	buf = 0;
+	last_amp = 0;
+	delta_factor = 0;
+}
+
+#undef PI
+#define PI 3.1415926535897932384626433832795029
+
+static void gen_sinc( float* out, int count, double oversample, double treble, double cutoff )
+{
+	if ( cutoff >= 0.999 )
+		cutoff = 0.999;
+	
+	if ( treble < -300.0 )
+		treble = -300.0;
+	if ( treble > 5.0 )
+		treble = 5.0;
+	
+	double const maxh = 4096.0;
+	double const rolloff = pow( 10.0, 1.0 / (maxh * 20.0) * treble / (1.0 - cutoff) );
+	double const pow_a_n = pow( rolloff, maxh - maxh * cutoff );
+	double const to_angle = PI / 2 / maxh / oversample;
+	for ( int i = 0; i < count; i++ )
+	{
+		double angle          = ((i - count) * 2 + 1) * to_angle;
+		double angle_maxh     = angle * maxh;
+		double angle_maxh_mid = angle_maxh * cutoff;
+		
+		double y = maxh;
+		
+		// 0 to Fs/2*cutoff, flat
+		if ( angle_maxh_mid ) // unstable at t=0
+			y *= sin( angle_maxh_mid ) / angle_maxh_mid;
+		
+		// Fs/2*cutoff to Fs/2, logarithmic rolloff
+		double cosa = cos( angle );
+		double den = 1 + rolloff * (rolloff - cosa - cosa);
+		
+		// Becomes unstable when rolloff is near 1.0 and t is near 0,
+		// which is the only time den becomes small
+		if ( den > 1e-13 )
+		{
+			double num =
+				(cos( angle_maxh     - angle ) * rolloff - cos( angle_maxh     )) * pow_a_n -
+				 cos( angle_maxh_mid - angle ) * rolloff + cos( angle_maxh_mid );
+			
+			y = y * cutoff + num / den;
+		}
+		
+		out [i] = (float) y;
+	}
+}
+
+void blip_eq_t::generate( float* out, int count ) const
+{
+	// lower cutoff freq for narrow kernels with their wider transition band
+	// (8 points->1.49, 16 points->1.15)
+	double oversample = blip_res * 2.25 / count + 0.85;
+	double half_rate = sample_rate * 0.5;
+	if ( cutoff_freq )
+		oversample = half_rate / cutoff_freq;
+	double cutoff = rolloff_freq * oversample / half_rate;
+	
+	gen_sinc( out, count, blip_res * oversample, treble, cutoff );
+	
+	// apply (half of) hamming window
+	double to_fraction = PI / (count - 1);
+	for ( int i = count; i--; )
+		out [i] *= 0.54f - 0.46f * (float) cos( i * to_fraction );
+}
+
+void Blip_Synth_::adjust_impulse()
+{
+	// sum pairs for each phase and add error correction to end of first half
+	int const size = impulses_size();
+	for ( int p = blip_res; p-- >= blip_res / 2; )
+	{
+		int p2 = blip_res - 2 - p;
+		long error = kernel_unit;
+		for ( int i = 1; i < size; i += blip_res )
+		{
+			error -= impulses [i + p ];
+			error -= impulses [i + p2];
+		}
+		if ( p == p2 )
+			error /= 2; // phase = 0.5 impulse uses same half for both sides
+		impulses [size - blip_res + p] += (short) error;
+		//printf( "error: %ld\n", error );
+	}
+	
+	//for ( int i = blip_res; i--; printf( "\n" ) )
+	//  for ( int j = 0; j < width / 2; j++ )
+	//      printf( "%5ld,", impulses [j * blip_res + i + 1] );
+}
+
+void Blip_Synth_::treble_eq( blip_eq_t const& eq )
+{
+	float fimpulse [blip_res / 2 * (blip_widest_impulse_ - 1) + blip_res * 2];
+	
+	int const half_size = blip_res / 2 * (width - 1);
+	eq.generate( &fimpulse [blip_res], half_size );
+	
+	int i;
+	
+	// need mirror slightly past center for calculation
+	for ( i = blip_res; i--; )
+		fimpulse [blip_res + half_size + i] = fimpulse [blip_res + half_size - 1 - i];
+	
+	// starts at 0
+	for ( i = 0; i < blip_res; i++ )
+		fimpulse [i] = 0.0f;
+	
+	// find rescale factor
+	double total = 0.0;
+	for ( i = 0; i < half_size; i++ )
+		total += fimpulse [blip_res + i];
+	
+	//double const base_unit = 44800.0 - 128 * 18; // allows treble up to +0 dB
+	//double const base_unit = 37888.0; // allows treble to +5 dB
+	double const base_unit = 32768.0; // necessary for blip_unscaled to work
+	double rescale = base_unit / 2 / total;
+	kernel_unit = (long) base_unit;
+	
+	// integrate, first difference, rescale, convert to int
+	double sum = 0.0;
+	double next = 0.0;
+	int const impulses_size = this->impulses_size();
+	for ( i = 0; i < impulses_size; i++ )
+	{
+		impulses [i] = (short) floor( (next - sum) * rescale + 0.5 );
+		sum += fimpulse [i];
+		next += fimpulse [i + blip_res];
+	}
+	adjust_impulse();
+	
+	// volume might require rescaling
+	double vol = volume_unit_;
+	if ( vol )
+	{
+		volume_unit_ = 0.0;
+		volume_unit( vol );
+	}
+}
+
+void Blip_Synth_::volume_unit( double new_unit )
+{
+	if ( new_unit != volume_unit_ )
+	{
+		// use default eq if it hasn't been set yet
+		if ( !kernel_unit )
+			treble_eq( -8.0 );
+		
+		volume_unit_ = new_unit;
+		double factor = new_unit * (1L << blip_sample_bits) / kernel_unit;
+		
+		if ( factor > 0.0 )
+		{
+			int shift = 0;
+			
+			// if unit is really small, might need to attenuate kernel
+			while ( factor < 2.0 )
+			{
+				shift++;
+				factor *= 2.0;
+			}
+			
+			if ( shift )
+			{
+				kernel_unit >>= shift;
+				assert( kernel_unit > 0 ); // fails if volume unit is too low
+				
+				// keep values positive to avoid round-towards-zero of sign-preserving
+				// right shift for negative values
+				long offset = 0x8000 + (1 << (shift - 1));
+				long offset2 = 0x8000 >> shift;
+				for ( int i = impulses_size(); i--; )
+					impulses [i] = (short) (((impulses [i] + offset) >> shift) - offset2);
+				adjust_impulse();
+			}
+		}
+		delta_factor = (int) floor( factor + 0.5 );
+		//printf( "delta_factor: %d, kernel_unit: %d\n", delta_factor, kernel_unit );
+	}
+}
+#endif
+
+long Blip_Buffer::read_samples( blip_sample_t* BLIP_RESTRICT out, long max_samples, int stereo )
+{
+	long count = samples_avail();
+	if ( count > max_samples )
+		count = max_samples;
+	
+	if ( count )
+	{
+		int const bass = BLIP_READER_BASS( *this );
+		BLIP_READER_BEGIN( reader, *this );
+		
+		if ( !stereo )
+		{
+			for ( blip_long n = count; n; --n )
+			{
+				blip_long s = BLIP_READER_READ( reader );
+				if ( (blip_sample_t) s != s )
+					s = 0x7FFF - (s >> 24);
+				*out++ = (blip_sample_t) s;
+				BLIP_READER_NEXT( reader, bass );
+			}
+		}
+		else
+		{
+			for ( blip_long n = count; n; --n )
+			{
+				blip_long s = BLIP_READER_READ( reader );
+				if ( (blip_sample_t) s != s )
+					s = 0x7FFF - (s >> 24);
+				*out = (blip_sample_t) s;
+				out += 2;
+				BLIP_READER_NEXT( reader, bass );
+			}
+		}
+		BLIP_READER_END( reader, *this );
+		
+		remove_samples( count );
+	}
+	return count;
+}
+
+void Blip_Buffer::mix_samples( blip_sample_t const* in, long count )
+{
+	if ( buffer_size_ == silent_buf_size )
+	{
+		assert( 0 );
+		return;
+	}
+	
+	buf_t_* out = buffer_ + (offset_ >> BLIP_BUFFER_ACCURACY) + blip_widest_impulse_ / 2;
+	
+	int const sample_shift = blip_sample_bits - 16;
+	int prev = 0;
+	while ( count-- )
+	{
+		blip_long s = (blip_long) *in++ << sample_shift;
+		*out += s - prev;
+		prev = s;
+		++out;
+	}
+	*out -= prev;
+}
+
diff --git a/libraries/game-music-emu/gme/Blip_Buffer.h b/libraries/game-music-emu/gme/Blip_Buffer.h
new file mode 100644
index 000000000..e6facc820
--- /dev/null
+++ b/libraries/game-music-emu/gme/Blip_Buffer.h
@@ -0,0 +1,490 @@
+// Band-limited sound synthesis buffer
+
+// Blip_Buffer 0.4.1
+#ifndef BLIP_BUFFER_H
+#define BLIP_BUFFER_H
+
+	// internal
+	#include <limits.h>
+	#if INT_MAX < 0x7FFFFFFF
+		#error "int must be at least 32 bits"
+	#endif
+	
+	typedef int blip_long;
+	typedef unsigned blip_ulong;
+
+// Time unit at source clock rate
+typedef blip_long blip_time_t;
+
+// Output samples are 16-bit signed, with a range of -32768 to 32767
+typedef short blip_sample_t;
+enum { blip_sample_max = 32767 };
+
+class Blip_Buffer {
+public:
+	typedef const char* blargg_err_t;
+	
+	// Set output sample rate and buffer length in milliseconds (1/1000 sec, defaults
+	// to 1/4 second), then clear buffer. Returns NULL on success, otherwise if there
+	// isn't enough memory, returns error without affecting current buffer setup.
+	blargg_err_t set_sample_rate( long samples_per_sec, int msec_length = 1000 / 4 );
+	
+	// Set number of source time units per second
+	void clock_rate( long );
+	
+	// End current time frame of specified duration and make its samples available
+	// (along with any still-unread samples) for reading with read_samples(). Begins
+	// a new time frame at the end of the current frame.
+	void end_frame( blip_time_t time );
+	
+	// Read at most 'max_samples' out of buffer into 'dest', removing them from from
+	// the buffer. Returns number of samples actually read and removed. If stereo is
+	// true, increments 'dest' one extra time after writing each sample, to allow
+	// easy interleving of two channels into a stereo output buffer.
+	long read_samples( blip_sample_t* dest, long max_samples, int stereo = 0 );
+	
+// Additional optional features
+
+	// Current output sample rate
+	long sample_rate() const;
+	
+	// Length of buffer, in milliseconds
+	int length() const;
+	
+	// Number of source time units per second
+	long clock_rate() const;
+	
+	// Set frequency high-pass filter frequency, where higher values reduce bass more
+	void bass_freq( int frequency );
+	
+	// Number of samples delay from synthesis to samples read out
+	int output_latency() const;
+	
+	// Remove all available samples and clear buffer to silence. If 'entire_buffer' is
+	// false, just clears out any samples waiting rather than the entire buffer.
+	void clear( int entire_buffer = 1 );
+	
+	// Number of samples available for reading with read_samples()
+	long samples_avail() const;
+	
+	// Remove 'count' samples from those waiting to be read
+	void remove_samples( long count );
+	
+// Experimental features
+	
+	// Count number of clocks needed until 'count' samples will be available.
+	// If buffer can't even hold 'count' samples, returns number of clocks until
+	// buffer becomes full.
+	blip_time_t count_clocks( long count ) const;
+	
+	// Number of raw samples that can be mixed within frame of specified duration.
+	long count_samples( blip_time_t duration ) const;
+	
+	// Mix 'count' samples from 'buf' into buffer.
+	void mix_samples( blip_sample_t const* buf, long count );
+	
+	// not documented yet
+	void set_modified() { modified_ = 1; }
+	int clear_modified() { int b = modified_; modified_ = 0; return b; }
+	typedef blip_ulong blip_resampled_time_t;
+	void remove_silence( long count );
+	blip_resampled_time_t resampled_duration( int t ) const     { return t * factor_; }
+	blip_resampled_time_t resampled_time( blip_time_t t ) const { return t * factor_ + offset_; }
+	blip_resampled_time_t clock_rate_factor( long clock_rate ) const;
+public:
+	Blip_Buffer();
+	~Blip_Buffer();
+	
+	Blip_Buffer(Blip_Buffer &&) = default;
+
+	// Deprecated
+	typedef blip_resampled_time_t resampled_time_t;
+	blargg_err_t sample_rate( long r ) { return set_sample_rate( r ); }
+	blargg_err_t sample_rate( long r, int msec ) { return set_sample_rate( r, msec ); }
+private:
+	// noncopyable
+	Blip_Buffer( const Blip_Buffer& );
+	Blip_Buffer& operator = ( const Blip_Buffer& );
+public:
+	typedef blip_time_t buf_t_;
+	blip_ulong factor_;
+	blip_resampled_time_t offset_;
+	buf_t_* buffer_;
+	blip_long buffer_size_;
+	blip_long reader_accum_;
+	int bass_shift_;
+private:
+	long sample_rate_;
+	long clock_rate_;
+	int bass_freq_;
+	int length_;
+	int modified_;
+	friend class Blip_Reader;
+};
+
+#ifdef HAVE_CONFIG_H
+	#include "config.h"
+#endif
+
+// Number of bits in resample ratio fraction. Higher values give a more accurate ratio
+// but reduce maximum buffer size.
+#ifndef BLIP_BUFFER_ACCURACY
+	#define BLIP_BUFFER_ACCURACY 16
+#endif
+
+// Number bits in phase offset. Fewer than 6 bits (64 phase offsets) results in
+// noticeable broadband noise when synthesizing high frequency square waves.
+// Affects size of Blip_Synth objects since they store the waveform directly.
+#ifndef BLIP_PHASE_BITS
+	#if BLIP_BUFFER_FAST
+		#define BLIP_PHASE_BITS 8
+	#else
+		#define BLIP_PHASE_BITS 6
+	#endif
+#endif
+
+	// Internal
+	typedef blip_ulong blip_resampled_time_t;
+	int const blip_widest_impulse_ = 16;
+	int const blip_buffer_extra_ = blip_widest_impulse_ + 2;
+	int const blip_res = 1 << BLIP_PHASE_BITS;
+	class blip_eq_t;
+	
+	class Blip_Synth_Fast_ {
+	public:
+		Blip_Buffer* buf;
+		int last_amp;
+		int delta_factor;
+		
+		void volume_unit( double );
+		Blip_Synth_Fast_();
+		void treble_eq( blip_eq_t const& ) { }
+	};
+	
+	class Blip_Synth_ {
+	public:
+		Blip_Buffer* buf;
+		int last_amp;
+		int delta_factor;
+		
+		void volume_unit( double );
+		Blip_Synth_( short* impulses, int width );
+		void treble_eq( blip_eq_t const& );
+	private:
+		double volume_unit_;
+		short* const impulses;
+		int const width;
+		blip_long kernel_unit;
+		int impulses_size() const { return blip_res / 2 * width + 1; }
+		void adjust_impulse();
+	};
+
+// Quality level. Start with blip_good_quality.
+const int blip_med_quality  = 8;
+const int blip_good_quality = 12;
+const int blip_high_quality = 16;
+
+// Range specifies the greatest expected change in amplitude. Calculate it
+// by finding the difference between the maximum and minimum expected
+// amplitudes (max - min).
+template<int quality,int range>
+class Blip_Synth {
+public:
+	// Set overall volume of waveform
+	void volume( double v ) { impl.volume_unit( v * (1.0 / (range < 0 ? -range : range)) ); }
+	
+	// Configure low-pass filter (see blip_buffer.txt)
+	void treble_eq( blip_eq_t const& eq )       { impl.treble_eq( eq ); }
+	
+	// Get/set Blip_Buffer used for output
+	Blip_Buffer* output() const                 { return impl.buf; }
+	void output( Blip_Buffer* b )               { impl.buf = b; impl.last_amp = 0; }
+	
+	// Update amplitude of waveform at given time. Using this requires a separate
+	// Blip_Synth for each waveform.
+	void update( blip_time_t time, int amplitude );
+
+// Low-level interface
+
+	// Add an amplitude transition of specified delta, optionally into specified buffer
+	// rather than the one set with output(). Delta can be positive or negative.
+	// The actual change in amplitude is delta * (volume / range)
+	void offset( blip_time_t, int delta, Blip_Buffer* ) const;
+	void offset( blip_time_t t, int delta ) const { offset( t, delta, impl.buf ); }
+	
+	// Works directly in terms of fractional output samples. Contact author for more info.
+	void offset_resampled( blip_resampled_time_t, int delta, Blip_Buffer* ) const;
+	
+	// Same as offset(), except code is inlined for higher performance
+	void offset_inline( blip_time_t t, int delta, Blip_Buffer* buf ) const {
+		offset_resampled( t * buf->factor_ + buf->offset_, delta, buf );
+	}
+	void offset_inline( blip_time_t t, int delta ) const {
+		offset_resampled( t * impl.buf->factor_ + impl.buf->offset_, delta, impl.buf );
+	}
+	
+private:
+#if BLIP_BUFFER_FAST
+	Blip_Synth_Fast_ impl;
+#else
+	Blip_Synth_ impl;
+	typedef short imp_t;
+	imp_t impulses [blip_res * (quality / 2) + 1];
+public:
+	Blip_Synth() : impl( impulses, quality ) { }
+#endif
+};
+
+// Low-pass equalization parameters
+class blip_eq_t {
+public:
+	// Logarithmic rolloff to treble dB at half sampling rate. Negative values reduce
+	// treble, small positive values (0 to 5.0) increase treble.
+	blip_eq_t( double treble_db = 0 );
+	
+	// See blip_buffer.txt
+	blip_eq_t( double treble, long rolloff_freq, long sample_rate, long cutoff_freq = 0 );
+	
+private:
+	double treble;
+	long rolloff_freq;
+	long sample_rate;
+	long cutoff_freq;
+	void generate( float* out, int count ) const;
+	friend class Blip_Synth_;
+};
+
+int const blip_sample_bits = 30;
+
+// Dummy Blip_Buffer to direct sound output to, for easy muting without
+// having to stop sound code.
+class Silent_Blip_Buffer : public Blip_Buffer {
+	buf_t_ buf [blip_buffer_extra_ + 1];
+public:
+	// The following cannot be used (an assertion will fail if attempted):
+	blargg_err_t set_sample_rate( long samples_per_sec, int msec_length );
+	blip_time_t count_clocks( long count ) const;
+	void mix_samples( blip_sample_t const* buf, long count );
+	
+	Silent_Blip_Buffer();
+};
+
+	#if defined (__GNUC__) || _MSC_VER >= 1100
+		#define BLIP_RESTRICT __restrict
+	#else
+		#define BLIP_RESTRICT
+	#endif
+
+// Optimized reading from Blip_Buffer, for use in custom sample output
+
+// Begin reading from buffer. Name should be unique to the current block.
+#define BLIP_READER_BEGIN( name, blip_buffer ) \
+	const Blip_Buffer::buf_t_* BLIP_RESTRICT name##_reader_buf = (blip_buffer).buffer_;\
+	blip_long name##_reader_accum = (blip_buffer).reader_accum_
+
+// Get value to pass to BLIP_READER_NEXT()
+#define BLIP_READER_BASS( blip_buffer ) ((blip_buffer).bass_shift_)
+
+// Constant value to use instead of BLIP_READER_BASS(), for slightly more optimal
+// code at the cost of having no bass control
+int const blip_reader_default_bass = 9;
+
+// Current sample
+#define BLIP_READER_READ( name )        (name##_reader_accum >> (blip_sample_bits - 16))
+
+// Current raw sample in full internal resolution
+#define BLIP_READER_READ_RAW( name )    (name##_reader_accum)
+
+// Advance to next sample
+#define BLIP_READER_NEXT( name, bass ) \
+	(void) (name##_reader_accum += *name##_reader_buf++ - (name##_reader_accum >> (bass)))
+
+// End reading samples from buffer. The number of samples read must now be removed
+// using Blip_Buffer::remove_samples().
+#define BLIP_READER_END( name, blip_buffer ) \
+	(void) ((blip_buffer).reader_accum_ = name##_reader_accum)
+
+
+// Compatibility with older version
+const long blip_unscaled = 65535;
+const int blip_low_quality  = blip_med_quality;
+const int blip_best_quality = blip_high_quality;
+
+// Deprecated; use BLIP_READER macros as follows:
+// Blip_Reader r; r.begin( buf ); -> BLIP_READER_BEGIN( r, buf );
+// int bass = r.begin( buf )      -> BLIP_READER_BEGIN( r, buf ); int bass = BLIP_READER_BASS( buf );
+// r.read()                       -> BLIP_READER_READ( r )
+// r.read_raw()                   -> BLIP_READER_READ_RAW( r )
+// r.next( bass )                 -> BLIP_READER_NEXT( r, bass )
+// r.next()                       -> BLIP_READER_NEXT( r, blip_reader_default_bass )
+// r.end( buf )                   -> BLIP_READER_END( r, buf )
+class Blip_Reader {
+public:
+	int begin( Blip_Buffer& );
+	blip_long read() const          { return accum >> (blip_sample_bits - 16); }
+	blip_long read_raw() const      { return accum; }
+	void next( int bass_shift = 9 )         { accum += *buf++ - (accum >> bass_shift); }
+	void end( Blip_Buffer& b )              { b.reader_accum_ = accum; }
+	
+private:
+	const Blip_Buffer::buf_t_* buf;
+	blip_long accum;
+};
+
+// End of public interface
+
+#include <assert.h>
+
+template<int quality,int range>
+inline void Blip_Synth<quality,range>::offset_resampled( blip_resampled_time_t time,
+		int delta, Blip_Buffer* blip_buf ) const
+{
+	// Fails if time is beyond end of Blip_Buffer, due to a bug in caller code or the
+	// need for a longer buffer as set by set_sample_rate().
+	assert( (blip_long) (time >> BLIP_BUFFER_ACCURACY) < blip_buf->buffer_size_ );
+	delta *= impl.delta_factor;
+	blip_long* BLIP_RESTRICT buf = blip_buf->buffer_ + (time >> BLIP_BUFFER_ACCURACY);
+	int phase = (int) (time >> (BLIP_BUFFER_ACCURACY - BLIP_PHASE_BITS) & (blip_res - 1));
+
+#if BLIP_BUFFER_FAST
+	blip_long left = buf [0] + delta;
+	
+	// Kind of crappy, but doing shift after multiply results in overflow.
+	// Alternate way of delaying multiply by delta_factor results in worse
+	// sub-sample resolution.
+	blip_long right = (delta >> BLIP_PHASE_BITS) * phase;
+	left  -= right;
+	right += buf [1];
+	
+	buf [0] = left;
+	buf [1] = right;
+#else
+
+	int const fwd = (blip_widest_impulse_ - quality) / 2;
+	int const rev = fwd + quality - 2;
+	int const mid = quality / 2 - 1;
+	
+	imp_t const* BLIP_RESTRICT imp = impulses + blip_res - phase;
+	
+	#if defined (_M_IX86) || defined (_M_IA64) || defined (__i486__) || \
+			defined (__x86_64__) || defined (__ia64__) || defined (__i386__)
+	
+	// straight forward implementation resulted in better code on GCC for x86
+	
+	#define ADD_IMP( out, in ) \
+		buf [out] += (blip_long) imp [blip_res * (in)] * delta
+	
+	#define BLIP_FWD( i ) {\
+		ADD_IMP( fwd     + i, i     );\
+		ADD_IMP( fwd + 1 + i, i + 1 );\
+	}
+	#define BLIP_REV( r ) {\
+		ADD_IMP( rev     - r, r + 1 );\
+		ADD_IMP( rev + 1 - r, r     );\
+	}
+
+		BLIP_FWD( 0 )
+		if ( quality > 8  ) BLIP_FWD( 2 )
+		if ( quality > 12 ) BLIP_FWD( 4 )
+		{
+			ADD_IMP( fwd + mid - 1, mid - 1 );
+			ADD_IMP( fwd + mid    , mid     );
+			imp = impulses + phase;
+		}
+		if ( quality > 12 ) BLIP_REV( 6 )
+		if ( quality > 8  ) BLIP_REV( 4 )
+		BLIP_REV( 2 )
+		
+		ADD_IMP( rev    , 1 );
+		ADD_IMP( rev + 1, 0 );
+		
+	#else
+	
+	// for RISC processors, help compiler by reading ahead of writes
+	
+	#define BLIP_FWD( i ) {\
+		blip_long t0 =                       i0 * delta + buf [fwd     + i];\
+		blip_long t1 = imp [blip_res * (i + 1)] * delta + buf [fwd + 1 + i];\
+		i0 =           imp [blip_res * (i + 2)];\
+		buf [fwd     + i] = t0;\
+		buf [fwd + 1 + i] = t1;\
+	}
+	#define BLIP_REV( r ) {\
+		blip_long t0 =                 i0 * delta + buf [rev     - r];\
+		blip_long t1 = imp [blip_res * r] * delta + buf [rev + 1 - r];\
+		i0 =           imp [blip_res * (r - 1)];\
+		buf [rev     - r] = t0;\
+		buf [rev + 1 - r] = t1;\
+	}
+		
+		blip_long i0 = *imp;
+		BLIP_FWD( 0 )
+		if ( quality > 8  ) BLIP_FWD( 2 )
+		if ( quality > 12 ) BLIP_FWD( 4 )
+		{
+			blip_long t0 =                   i0 * delta + buf [fwd + mid - 1];
+			blip_long t1 = imp [blip_res * mid] * delta + buf [fwd + mid    ];
+			imp = impulses + phase;
+			i0 = imp [blip_res * mid];
+			buf [fwd + mid - 1] = t0;
+			buf [fwd + mid    ] = t1;
+		}
+		if ( quality > 12 ) BLIP_REV( 6 )
+		if ( quality > 8  ) BLIP_REV( 4 )
+		BLIP_REV( 2 )
+		
+		blip_long t0 =   i0 * delta + buf [rev    ];
+		blip_long t1 = *imp * delta + buf [rev + 1];
+		buf [rev    ] = t0;
+		buf [rev + 1] = t1;
+	#endif
+	
+#endif
+}
+
+#undef BLIP_FWD
+#undef BLIP_REV
+
+template<int quality,int range>
+#if BLIP_BUFFER_FAST
+	inline
+#endif
+void Blip_Synth<quality,range>::offset( blip_time_t t, int delta, Blip_Buffer* buf ) const
+{
+	offset_resampled( t * buf->factor_ + buf->offset_, delta, buf );
+}
+
+template<int quality,int range>
+#if BLIP_BUFFER_FAST
+	inline
+#endif
+void Blip_Synth<quality,range>::update( blip_time_t t, int amp )
+{
+	int delta = amp - impl.last_amp;
+	impl.last_amp = amp;
+	offset_resampled( t * impl.buf->factor_ + impl.buf->offset_, delta, impl.buf );
+}
+
+inline blip_eq_t::blip_eq_t( double t ) :
+		treble( t ), rolloff_freq( 0 ), sample_rate( 44100 ), cutoff_freq( 0 ) { }
+inline blip_eq_t::blip_eq_t( double t, long rf, long sr, long cf ) :
+		treble( t ), rolloff_freq( rf ), sample_rate( sr ), cutoff_freq( cf ) { }
+
+inline int  Blip_Buffer::length() const         { return length_; }
+inline long Blip_Buffer::samples_avail() const  { return (long) (offset_ >> BLIP_BUFFER_ACCURACY); }
+inline long Blip_Buffer::sample_rate() const    { return sample_rate_; }
+inline int  Blip_Buffer::output_latency() const { return blip_widest_impulse_ / 2; }
+inline long Blip_Buffer::clock_rate() const     { return clock_rate_; }
+inline void Blip_Buffer::clock_rate( long cps ) { factor_ = clock_rate_factor( clock_rate_ = cps ); }
+
+inline int Blip_Reader::begin( Blip_Buffer& blip_buf )
+{
+	buf = blip_buf.buffer_;
+	accum = blip_buf.reader_accum_;
+	return blip_buf.bass_shift_;
+}
+
+int const blip_max_length = 0;
+int const blip_default_length = 250;
+
+#endif
diff --git a/libraries/game-music-emu/gme/CMakeLists.txt b/libraries/game-music-emu/gme/CMakeLists.txt
new file mode 100644
index 000000000..5c37ebd29
--- /dev/null
+++ b/libraries/game-music-emu/gme/CMakeLists.txt
@@ -0,0 +1,204 @@
+# List of source files required by libgme and any emulators
+# This is not 100% accurate (Fir_Resampler for instance) but
+# you'll be OK.
+set(libgme_SRCS Blip_Buffer.cpp
+                Classic_Emu.cpp
+                Data_Reader.cpp
+                Dual_Resampler.cpp
+                Effects_Buffer.cpp
+                Fir_Resampler.cpp
+                gme.cpp
+                Gme_File.cpp
+                M3u_Playlist.cpp
+                Multi_Buffer.cpp
+                Music_Emu.cpp
+                )
+
+# static builds need to find static zlib (and static forms of other needed
+# libraries.  Ensure CMake looks only for static libs if we're doing a static
+# build.  See https://stackoverflow.com/a/44738756
+if(NOT BUILD_SHARED_LIBS)
+    set(CMAKE_FIND_LIBRARY_SUFFIXES ".a")
+endif()
+
+find_package(ZLIB QUIET)
+
+# Ay_Apu is very popular around here
+if (USE_GME_AY OR USE_GME_KSS)
+    set(libgme_SRCS ${libgme_SRCS}
+                Ay_Apu.cpp
+        )
+endif()
+
+# so is Ym2612_Emu
+if (USE_GME_VGM OR USE_GME_GYM)
+    if(GME_YM2612_EMU STREQUAL "Nuked")
+        add_definitions(-DVGM_YM2612_NUKED)
+        set(libgme_SRCS ${libgme_SRCS}
+                    Ym2612_Nuked.cpp
+            )
+        message("VGM/GYM: Nuked OPN2 emulator will be used")
+    elseif(GME_YM2612_EMU STREQUAL "MAME")
+        add_definitions(-DVGM_YM2612_MAME)
+        set(libgme_SRCS ${libgme_SRCS}
+                    Ym2612_MAME.cpp
+            )
+        message("VGM/GYM: MAME YM2612 emulator will be used")
+    else()
+        add_definitions(-DVGM_YM2612_GENS)
+        set(libgme_SRCS ${libgme_SRCS}
+                    Ym2612_GENS.cpp
+            )
+        message("VGM/GYM: GENS 2.10 emulator will be used")
+    endif()
+endif()
+
+# But none are as popular as Sms_Apu
+if (USE_GME_VGM OR USE_GME_GYM OR USE_GME_KSS)
+    set(libgme_SRCS ${libgme_SRCS}
+                Sms_Apu.cpp
+        )
+endif()
+
+if (USE_GME_AY)
+    set(libgme_SRCS ${libgme_SRCS}
+              # Ay_Apu.cpp included earlier
+                Ay_Cpu.cpp
+                Ay_Emu.cpp
+        )
+endif()
+
+if (USE_GME_GBS)
+    set(libgme_SRCS ${libgme_SRCS}
+                Gb_Apu.cpp
+                Gb_Cpu.cpp
+                Gb_Oscs.cpp
+                Gbs_Emu.cpp
+        )
+endif()
+
+if (USE_GME_GYM)
+    set(libgme_SRCS ${libgme_SRCS}
+              # Sms_Apu.cpp included earlier
+              # Ym2612_Emu.cpp included earlier
+                Gym_Emu.cpp
+        )
+endif()
+
+if (USE_GME_HES)
+    set(libgme_SRCS ${libgme_SRCS}
+                Hes_Apu.cpp
+                Hes_Cpu.cpp
+                Hes_Emu.cpp
+        )
+endif()
+
+if (USE_GME_KSS)
+    set(libgme_SRCS ${libgme_SRCS}
+              # Ay_Apu.cpp included earlier
+              # Sms_Apu.cpp included earlier
+                Kss_Cpu.cpp
+                Kss_Emu.cpp
+                Kss_Scc_Apu.cpp
+        )
+endif()
+
+if (USE_GME_NSF OR USE_GME_NSFE)
+    set(libgme_SRCS ${libgme_SRCS}
+                Nes_Apu.cpp
+                Nes_Cpu.cpp
+                Nes_Fme7_Apu.cpp
+                Nes_Namco_Apu.cpp
+                Nes_Oscs.cpp
+                Nes_Vrc6_Apu.cpp
+                Nsf_Emu.cpp
+        )
+endif()
+
+if (USE_GME_NSFE)
+    set(libgme_SRCS ${libgme_SRCS}
+                Nsfe_Emu.cpp
+        )
+endif()
+
+if (USE_GME_SAP)
+    set(libgme_SRCS ${libgme_SRCS}
+                Sap_Apu.cpp
+                Sap_Cpu.cpp
+                Sap_Emu.cpp
+        )
+endif()
+
+if (USE_GME_SPC)
+    set(libgme_SRCS ${libgme_SRCS}
+                Snes_Spc.cpp
+                Spc_Cpu.cpp
+                Spc_Dsp.cpp
+                Spc_Emu.cpp
+                Spc_Filter.cpp
+        )
+endif()
+
+if (USE_GME_VGM)
+    set(libgme_SRCS ${libgme_SRCS}
+              # Sms_Apu.cpp included earlier
+              # Ym2612_Emu.cpp included earlier
+                Vgm_Emu.cpp
+                Vgm_Emu_Impl.cpp
+                Ym2413_Emu.cpp
+        )
+endif()
+
+# These headers are part of the generic gme interface.
+set (EXPORTED_HEADERS gme.h)
+
+# On some platforms we may need to change headers or whatnot based on whether
+# we're building the library or merely using the library. The following is
+# only defined when building the library to allow us to tell which is which.
+
+#[ZDoom] Not needed
+#add_definitions(-DBLARGG_BUILD_DLL)
+
+# For the gme_types.h
+include_directories(${CMAKE_CURRENT_BINARY_DIR})
+
+# Add library to be compiled.
+add_library(gme STATIC ${libgme_SRCS})
+
+if(ZLIB_FOUND)
+    message(" ** ZLib library located, compressed file formats will be supported")
+    target_compile_definitions(gme PRIVATE -DHAVE_ZLIB_H)
+    target_include_directories(gme PRIVATE ${ZLIB_INCLUDE_DIRS})
+    target_link_libraries(gme ${ZLIB_LIBRARIES})
+    # Is not to be installed though
+
+    set(PKG_CONFIG_ZLIB -lz) # evaluated in libgme.pc.in
+else()
+    message("ZLib library not found, disabling support for compressed formats such as VGZ")
+endif()
+
+# [ZDoom] Not needed.
+if( FALSE )
+# The version is the release.  The "soversion" is the API version.  As long
+# as only build fixes are performed (i.e. no backwards-incompatible changes
+# to the API), the SOVERSION should be the same even when bumping up VERSION.
+# The way gme.h is designed, SOVERSION should very rarely be bumped, if ever.
+# Hopefully the API can stay compatible with old versions.
+set_target_properties(gme
+    PROPERTIES VERSION ${GME_VERSION}
+               SOVERSION 0)
+
+install(TARGETS gme LIBRARY DESTINATION lib${LIB_SUFFIX}
+                    RUNTIME DESTINATION bin  # DLL platforms
+                    ARCHIVE DESTINATION lib) # DLL platforms
+
+# Run during cmake phase, so this is available during make
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/gme_types.h.in
+    ${CMAKE_CURRENT_BINARY_DIR}/gme_types.h)
+
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libgme.pc.in
+    ${CMAKE_CURRENT_BINARY_DIR}/libgme.pc @ONLY)
+
+install(FILES ${EXPORTED_HEADERS} DESTINATION include/gme)
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libgme.pc DESTINATION lib${LIB_SUFFIX}/pkgconfig)
+endif()
diff --git a/libraries/game-music-emu/gme/Classic_Emu.cpp b/libraries/game-music-emu/gme/Classic_Emu.cpp
new file mode 100644
index 000000000..c572d9b5c
--- /dev/null
+++ b/libraries/game-music-emu/gme/Classic_Emu.cpp
@@ -0,0 +1,190 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Classic_Emu.h"
+
+#include "Multi_Buffer.h"
+#include <string.h>
+
+/* Copyright (C) 2003-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+Classic_Emu::Classic_Emu()
+{
+	buf           = 0;
+	stereo_buffer = 0;
+	voice_types   = 0;
+	
+	// avoid inconsistency in our duplicated constants
+	assert( (int) wave_type  == (int) Multi_Buffer::wave_type );
+	assert( (int) noise_type == (int) Multi_Buffer::noise_type );
+	assert( (int) mixed_type == (int) Multi_Buffer::mixed_type );
+}
+
+Classic_Emu::~Classic_Emu()
+{
+	delete stereo_buffer;
+}
+
+void Classic_Emu::set_equalizer_( equalizer_t const& eq )
+{
+	Music_Emu::set_equalizer_( eq );
+	update_eq( eq.treble );
+	if ( buf )
+		buf->bass_freq( (int) equalizer().bass );
+}
+	
+blargg_err_t Classic_Emu::set_sample_rate_( long rate )
+{
+	if ( !buf )
+	{
+		if ( !stereo_buffer )
+			CHECK_ALLOC( stereo_buffer = BLARGG_NEW Stereo_Buffer );
+		buf = stereo_buffer;
+	}
+	return buf->set_sample_rate( rate, 1000 / 20 );
+}
+
+blargg_err_t Classic_Emu::set_multi_channel ( bool is_enabled )
+{
+        RETURN_ERR( Music_Emu::set_multi_channel_( is_enabled ) );
+        return 0;
+}
+
+void Classic_Emu::mute_voices_( int mask )
+{
+	Music_Emu::mute_voices_( mask );
+	for ( int i = voice_count(); i--; )
+	{
+		if ( mask & (1 << i) )
+		{
+			set_voice( i, 0, 0, 0 );
+		}
+		else
+		{
+			Multi_Buffer::channel_t ch = buf->channel( i, (voice_types ? voice_types [i] : 0) );
+			assert( (ch.center && ch.left && ch.right) ||
+					(!ch.center && !ch.left && !ch.right) ); // all or nothing
+			set_voice( i, ch.center, ch.left, ch.right );
+		}
+	}
+}
+
+void Classic_Emu::change_clock_rate( long rate )
+{
+	clock_rate_ = rate;
+	buf->clock_rate( rate );
+}
+
+blargg_err_t Classic_Emu::setup_buffer( long rate )
+{
+	change_clock_rate( rate );
+	RETURN_ERR( buf->set_channel_count( voice_count() ) );
+	set_equalizer( equalizer() );
+	buf_changed_count = buf->channels_changed_count();
+	return 0;
+}
+
+blargg_err_t Classic_Emu::start_track_( int track )
+{
+	RETURN_ERR( Music_Emu::start_track_( track ) );
+	buf->clear();
+	return 0;
+}
+
+blargg_err_t Classic_Emu::play_( long count, sample_t* out )
+{
+	long remain = count;
+	while ( remain )
+	{
+		remain -= buf->read_samples( &out [count - remain], remain );
+		if ( remain )
+		{
+			if ( buf_changed_count != buf->channels_changed_count() )
+			{
+				buf_changed_count = buf->channels_changed_count();
+				remute_voices();
+			}
+			int msec = buf->length();
+			blip_time_t clocks_emulated = (blargg_long) msec * clock_rate_ / 1000;
+			RETURN_ERR( run_clocks( clocks_emulated, msec ) );
+			assert( clocks_emulated );
+			buf->end_frame( clocks_emulated );
+		}
+	}
+	return 0;
+}
+
+// Rom_Data
+
+blargg_err_t Rom_Data_::load_rom_data_( Data_Reader& in,
+		int header_size, void* header_out, int fill, long pad_size )
+{
+	long file_offset = pad_size - header_size;
+	
+	rom_addr = 0;
+	mask     = 0;
+	size_    = 0;
+	rom.clear();
+	
+	file_size_ = in.remain();
+	if ( file_size_ <= header_size ) // <= because there must be data after header
+		return gme_wrong_file_type;
+	blargg_err_t err = rom.resize( file_offset + file_size_ + pad_size );
+	if ( !err )
+		err = in.read( rom.begin() + file_offset, file_size_ );
+	if ( err )
+	{
+		rom.clear();
+		return err;
+	}
+	
+	file_size_ -= header_size;
+	memcpy( header_out, &rom [file_offset], header_size );
+	
+	memset( rom.begin()         , fill, pad_size );
+	memset( rom.end() - pad_size, fill, pad_size );
+	
+	return 0;
+}
+
+void Rom_Data_::set_addr_( long addr, int unit )
+{
+	rom_addr = addr - unit - pad_extra;
+	
+	long rounded = (addr + file_size_ + unit - 1) / unit * unit;
+	if ( rounded <= 0 )
+	{
+		rounded = 0;
+	}
+	else
+	{
+		int shift = 0;
+		unsigned long max_addr = (unsigned long) (rounded - 1);
+		while ( max_addr >> shift )
+			shift++;
+		mask = (1L << shift) - 1;
+	}
+	
+	if ( addr < 0 )
+		addr = 0;
+	size_ = rounded;
+	if ( rom.resize( rounded - rom_addr + pad_extra ) ) { } // OK if shrink fails
+
+	if ( 0 )
+	{
+		debug_printf( "addr: %X\n", addr );
+		debug_printf( "file_size: %d\n", file_size_ );
+		debug_printf( "rounded: %d\n", rounded );
+		debug_printf( "mask: $%X\n", mask );
+	}
+}
diff --git a/libraries/game-music-emu/gme/Classic_Emu.h b/libraries/game-music-emu/gme/Classic_Emu.h
new file mode 100644
index 000000000..57cdd5c32
--- /dev/null
+++ b/libraries/game-music-emu/gme/Classic_Emu.h
@@ -0,0 +1,128 @@
+// Common aspects of emulators which use Blip_Buffer for sound output
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef CLASSIC_EMU_H
+#define CLASSIC_EMU_H
+
+#include "blargg_common.h"
+#include "Blip_Buffer.h"
+#include "Music_Emu.h"
+
+class Classic_Emu : public Music_Emu {
+public:
+	Classic_Emu();
+	~Classic_Emu();
+	void set_buffer( Multi_Buffer* );
+	blargg_err_t set_multi_channel( bool is_enabled ) override;
+protected:
+	// Services
+	enum { wave_type = 0x100, noise_type = 0x200, mixed_type = wave_type | noise_type };
+	void set_voice_types( int const* t ) { voice_types = t; }
+	blargg_err_t setup_buffer( long clock_rate );
+	long clock_rate() const { return clock_rate_; }
+	void change_clock_rate( long ); // experimental
+	
+	// Overridable
+	virtual void set_voice( int index, Blip_Buffer* center,
+			Blip_Buffer* left, Blip_Buffer* right ) = 0;
+	virtual void update_eq( blip_eq_t const& ) = 0;
+	virtual blargg_err_t start_track_( int track ) = 0;
+	virtual blargg_err_t run_clocks( blip_time_t& time_io, int msec ) = 0;
+protected:
+	blargg_err_t set_sample_rate_( long sample_rate );
+	void mute_voices_( int );
+	void set_equalizer_( equalizer_t const& );
+	blargg_err_t play_( long, sample_t* );
+private:
+	Multi_Buffer* buf;
+	Multi_Buffer* stereo_buffer; // NULL if using custom buffer
+	long clock_rate_;
+	unsigned buf_changed_count;
+	int const* voice_types;
+};
+
+inline void Classic_Emu::set_buffer( Multi_Buffer* new_buf )
+{
+	assert( !buf && new_buf );
+	buf = new_buf;
+}
+
+// ROM data handler, used by several Classic_Emu derivitives. Loads file data
+// with padding on both sides, allowing direct use in bank mapping. The main purpose
+// is to allow all file data to be loaded with only one read() call (for efficiency).
+
+class Rom_Data_ {
+public:
+	typedef unsigned char byte;
+protected:
+	enum { pad_extra = 8 };
+	blargg_vector<byte> rom;
+	long file_size_;
+	blargg_long rom_addr;
+	blargg_long mask;
+	blargg_long size_; // TODO: eliminate
+	
+	blargg_err_t load_rom_data_( Data_Reader& in, int header_size, void* header_out,
+			int fill, long pad_size );
+	void set_addr_( long addr, int unit );
+};
+
+template<int unit>
+class Rom_Data : public Rom_Data_ {
+	enum { pad_size = unit + pad_extra };
+public:
+	// Load file data, using already-loaded header 'h' if not NULL. Copy header
+	// from loaded file data into *out and fill unmapped bytes with 'fill'.
+	blargg_err_t load( Data_Reader& in, int header_size, void* header_out, int fill )
+	{
+		return load_rom_data_( in, header_size, header_out, fill, pad_size );
+	}
+	
+	// Size of file data read in (excluding header)
+	long file_size() const { return file_size_; }
+	
+	// Pointer to beginning of file data
+	byte* begin() const { return rom.begin() + pad_size; }
+	
+	// Set address that file data should start at
+	void set_addr( long addr ) { set_addr_( addr, unit ); }
+	
+	// Free data
+	void clear() { rom.clear(); }
+	
+	// Size of data + start addr, rounded to a multiple of unit
+	long size() const { return size_; }
+	
+	// Pointer to unmapped page filled with same value
+	byte* unmapped() { return rom.begin(); }
+	
+	// Mask address to nearest power of two greater than size()
+	blargg_long mask_addr( blargg_long addr ) const
+	{
+		#ifdef check
+			check( addr <= mask );
+		#endif
+		return addr & mask;
+	}
+	
+	// Pointer to page starting at addr. Returns unmapped() if outside data.
+	byte* at_addr( blargg_long addr )
+	{
+		blargg_ulong offset = mask_addr( addr ) - rom_addr;
+		if ( offset > blargg_ulong (rom.size() - pad_size) )
+			offset = 0; // unmapped
+		return &rom [offset];
+	}
+};
+
+#ifndef GME_APU_HOOK
+	#define GME_APU_HOOK( emu, addr, data ) ((void) 0)
+#endif
+
+#ifndef GME_FRAME_HOOK
+	#define GME_FRAME_HOOK( emu ) ((void) 0)
+#else
+	#define GME_FRAME_HOOK_DEFINED 1
+#endif
+
+#endif
diff --git a/libraries/game-music-emu/gme/Data_Reader.cpp b/libraries/game-music-emu/gme/Data_Reader.cpp
new file mode 100644
index 000000000..1556c329f
--- /dev/null
+++ b/libraries/game-music-emu/gme/Data_Reader.cpp
@@ -0,0 +1,449 @@
+// File_Extractor 0.4.0. http://www.slack.net/~ant/
+
+#include "Data_Reader.h"
+
+#include "blargg_endian.h"
+#include <assert.h>
+#include <string.h>
+#include <stdio.h>
+
+/* Copyright (C) 2005-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+#ifdef HAVE_ZLIB_H
+#include <zlib.h>
+#include <stdlib.h>
+#include <errno.h>
+static const unsigned char gz_magic[2] = {0x1f, 0x8b}; /* gzip magic header */
+#endif /* HAVE_ZLIB_H */
+
+const char Data_Reader::eof_error [] = "Unexpected end of file";
+
+#define RETURN_VALIDITY_CHECK( cond ) \
+	do { if ( unlikely( !(cond) ) ) return "Corrupt file"; } while(0)
+
+blargg_err_t Data_Reader::read( void* p, long s )
+{
+	RETURN_VALIDITY_CHECK( s > 0 );
+
+	long result = read_avail( p, s );
+	if ( result != s )
+	{
+		if ( result >= 0 && result < s )
+			return eof_error;
+
+		return "Read error";
+	}
+
+	return 0;
+}
+
+blargg_err_t Data_Reader::skip( long count )
+{
+	RETURN_VALIDITY_CHECK( count >= 0 );
+
+	char buf [512];
+	while ( count )
+	{
+		long n = sizeof buf;
+		if ( n > count )
+			n = count;
+		count -= n;
+		RETURN_ERR( read( buf, n ) );
+	}
+	return 0;
+}
+
+long File_Reader::remain() const { return size() - tell(); }
+
+blargg_err_t File_Reader::skip( long n )
+{
+	RETURN_VALIDITY_CHECK( n >= 0 );
+
+	if ( !n )
+		return 0;
+	return seek( tell() + n );
+}
+
+// Subset_Reader
+
+Subset_Reader::Subset_Reader( Data_Reader* dr, long size )
+{
+	in = dr;
+	remain_ = dr->remain();
+	if ( remain_ > size )
+		remain_ = max( 0l, size );
+}
+
+long Subset_Reader::remain() const { return remain_; }
+
+long Subset_Reader::read_avail( void* p, long s )
+{
+	s = max( 0l, s );
+	if ( s > remain_ )
+		s = remain_;
+	remain_ -= s;
+	return in->read_avail( p, s );
+}
+
+// Remaining_Reader
+
+Remaining_Reader::Remaining_Reader( void const* h, long size, Data_Reader* r )
+{
+	header = (char const*) h;
+	header_end = header + max( 0l, size );
+	in = r;
+}
+
+long Remaining_Reader::remain() const { return header_end - header + in->remain(); }
+
+long Remaining_Reader::read_first( void* out, long count )
+{
+	count = max( 0l, count );
+	long first = header_end - header;
+	if ( first )
+	{
+		if ( first > count || first < 0 )
+			first = count;
+		void const* old = header;
+		header += first;
+		memcpy( out, old, (size_t) first );
+	}
+	return first;
+}
+
+long Remaining_Reader::read_avail( void* out, long count )
+{
+	count = max( 0l, count );
+	long first = read_first( out, count );
+	long second = max( 0l, count - first );
+	if ( second )
+	{
+		second = in->read_avail( (char*) out + first, second );
+		if ( second <= 0 )
+			return second;
+	}
+	return first + second;
+}
+
+blargg_err_t Remaining_Reader::read( void* out, long count )
+{
+	count = max( 0l, count );
+	long first = read_first( out, count );
+	long second = max( 0l, count - first );
+	if ( !second )
+		return 0;
+	return in->read( (char*) out + first, second );
+}
+
+// Mem_File_Reader
+
+Mem_File_Reader::Mem_File_Reader( const void* p, long s ) :
+	m_begin( (const char*) p ),
+	m_size( max( 0l, s ) ),
+	m_pos( 0l )
+{
+#ifdef HAVE_ZLIB_H
+	if( !m_begin )
+		return;
+
+	if ( gz_decompress() )
+	{
+		debug_printf( "Loaded compressed data\n" );
+		m_ownedPtr = true;
+	}
+#endif /* HAVE_ZLIB_H */
+}
+
+#ifdef HAVE_ZLIB_H
+Mem_File_Reader::~Mem_File_Reader()
+{
+	if ( m_ownedPtr )
+		free( const_cast<char*>( m_begin ) ); // see gz_compress for the malloc
+}
+#endif
+
+long Mem_File_Reader::size() const { return m_size; }
+
+long Mem_File_Reader::read_avail( void* p, long s )
+{
+	long r = remain();
+	if ( s > r || s < 0 )
+		s = r;
+	memcpy( p, m_begin + m_pos, static_cast<size_t>(s) );
+	m_pos += s;
+	return s;
+}
+
+long Mem_File_Reader::tell() const { return m_pos; }
+
+blargg_err_t Mem_File_Reader::seek( long n )
+{
+	RETURN_VALIDITY_CHECK( n >= 0 );
+	if ( n > m_size )
+		return eof_error;
+	m_pos = n;
+	return 0;
+}
+
+#ifdef HAVE_ZLIB_H
+
+bool Mem_File_Reader::gz_decompress()
+{
+	if ( m_size >= 2 && memcmp(m_begin, gz_magic, 2) != 0 )
+	{
+		/* Don't try to decompress non-GZ files, just assign input pointer */
+		return false;
+	}
+
+	using vec_size = size_t;
+	const vec_size full_length = static_cast<vec_size>( m_size );
+	const vec_size half_length = static_cast<vec_size>( m_size / 2 );
+
+	// We use malloc/friends here so we can realloc to grow buffer if needed
+	char *raw_data = reinterpret_cast<char *> ( malloc( full_length ) );
+	size_t raw_data_size = full_length;
+	if ( !raw_data )
+		return false;
+
+	z_stream strm;
+	strm.next_in   = const_cast<Bytef *>( reinterpret_cast<const Bytef *>( m_begin ) );
+	strm.avail_in  = static_cast<uInt>( m_size );
+	strm.total_out = 0;
+	strm.zalloc    = Z_NULL;
+	strm.zfree     = Z_NULL;
+
+	bool done = false;
+
+	// Adding 16 sets bit 4, which enables zlib to auto-detect the
+	// header.
+	if ( inflateInit2(&strm, (16 + MAX_WBITS)) != Z_OK )
+	{
+		free( raw_data );
+		return false;
+	}
+
+	while ( !done )
+	{
+		/* If our output buffer is too small */
+		if ( strm.total_out >= raw_data_size )
+		{
+			raw_data_size += half_length;
+			raw_data = reinterpret_cast<char *>( realloc( raw_data, raw_data_size ) );
+			if ( !raw_data ) {
+				return false;
+			}
+		}
+
+		strm.next_out  = reinterpret_cast<Bytef *>( raw_data + strm.total_out );
+		strm.avail_out = static_cast<uInt>( static_cast<uLong>( raw_data_size ) - strm.total_out );
+
+		/* Inflate another chunk. */
+		int err = inflate( &strm, Z_SYNC_FLUSH );
+		if ( err == Z_STREAM_END )
+			done = true;
+		else if ( err != Z_OK )
+			break;
+	}
+
+	if ( inflateEnd(&strm) != Z_OK )
+	{
+		free( raw_data );
+		return false;
+	}
+
+	m_begin = raw_data;
+	m_size  = static_cast<long>( strm.total_out );
+
+	return true;
+}
+
+#endif /* HAVE_ZLIB_H */
+
+
+// Callback_Reader
+
+Callback_Reader::Callback_Reader( callback_t c, long size, void* d ) :
+	callback( c ),
+	data( d )
+{
+	remain_ = max( 0l, size );
+}
+
+long Callback_Reader::remain() const { return remain_; }
+
+long Callback_Reader::read_avail( void* out, long count )
+{
+	if ( count > remain_ )
+		count = remain_;
+	if ( count < 0 || Callback_Reader::read( out, count ) )
+		count = -1;
+	return count;
+}
+
+blargg_err_t Callback_Reader::read( void* out, long count )
+{
+	RETURN_VALIDITY_CHECK( count >= 0 );
+	if ( count > remain_ )
+		return eof_error;
+	return callback( data, out, (int) count );
+}
+
+// Std_File_Reader
+
+#if 0//[ZDOOM:unneeded]def HAVE_ZLIB_H
+
+static const char* get_gzip_eof( const char* path, long* eof )
+{
+	FILE* file = fopen( path, "rb" );
+	if ( !file )
+		return "Couldn't open file";
+
+	unsigned char buf [4];
+	bool found_eof = false;
+	if ( fread( buf, 2, 1, file ) > 0 && buf [0] == 0x1F && buf [1] == 0x8B )
+	{
+		fseek( file, -4, SEEK_END );
+		if ( fread( buf, 4, 1, file ) > 0 ) {
+			*eof = get_le32( buf );
+			found_eof = true;
+		}
+	}
+	if ( !found_eof )
+	{
+		fseek( file, 0, SEEK_END );
+		*eof = ftell( file );
+	}
+	const char* err = (ferror( file ) || feof( file )) ? "Couldn't get file size" : nullptr;
+	fclose( file );
+	return err;
+}
+#endif
+
+
+Std_File_Reader::Std_File_Reader() :
+	file_( nullptr )
+#if 0//[ZDOOM:unneeded]def HAVE_ZLIB_H
+	, size_( 0 )
+#endif
+{ }
+
+Std_File_Reader::~Std_File_Reader() { close(); }
+
+blargg_err_t Std_File_Reader::open( const char* path )
+{
+#if 0//[ZDOOM:unneeded]def HAVE_ZLIB_H
+	// zlib transparently handles uncompressed data if magic header
+	// not present but we still need to grab size
+	RETURN_ERR( get_gzip_eof( path, &size_ ) );
+	file_ = gzopen( path, "rb" );
+#else
+	file_ = fopen( path, "rb" );
+#endif
+
+	if ( !file_ )
+		return "Couldn't open file";
+	return nullptr;
+}
+
+long Std_File_Reader::size() const
+{
+#if 0//[ZDOOM:unneeded]def HAVE_ZLIB_H
+	if ( file_ )
+		return size_; // Set for both compressed and uncompressed modes
+#endif
+	long pos = tell();
+	fseek( (FILE*) file_, 0, SEEK_END );
+	long result = tell();
+	fseek( (FILE*) file_, pos, SEEK_SET );
+	return result;
+}
+
+long Std_File_Reader::read_avail( void* p, long s )
+{
+#if 0//[ZDOOM:unneeded]def HAVE_ZLIB_H
+	if ( file_ && s > 0 && s <= UINT_MAX ) {
+		return gzread( reinterpret_cast<gzFile>(file_),
+			p, static_cast<unsigned>(s) );
+	}
+	return 0l;
+#else
+	const size_t readLength = static_cast<size_t>( max( 0l, s ) );
+	const auto result = fread( p, 1, readLength, reinterpret_cast<FILE*>(file_) );
+	return static_cast<long>( result );
+#endif /* HAVE_ZLIB_H */
+}
+
+blargg_err_t Std_File_Reader::read( void* p, long s )
+{
+	RETURN_VALIDITY_CHECK( s > 0 && s <= UINT_MAX );
+#if 0//[ZDOOM:unneeded]def HAVE_ZLIB_H
+	if ( file_ )
+	{
+		const auto &gzfile = reinterpret_cast<gzFile>( file_ );
+		if ( s == gzread( gzfile, p, static_cast<unsigned>( s ) ) )
+			return nullptr;
+		if ( gzeof( gzfile ) )
+			return eof_error;
+		return "Couldn't read from GZ file";
+	}
+#endif
+	const auto &file = reinterpret_cast<FILE*>( file_ );
+	if ( s == static_cast<long>( fread( p, 1, static_cast<size_t>(s), file ) ) )
+		return 0;
+	if ( feof( file ) )
+		return eof_error;
+	return "Couldn't read from file";
+}
+
+long Std_File_Reader::tell() const
+{
+#if 0//[ZDOOM:unneeded]def HAVE_ZLIB_H
+	if ( file_ )
+		return gztell( reinterpret_cast<gzFile>( file_ ) );
+#endif
+	return ftell( reinterpret_cast<FILE*>( file_ ) );
+}
+
+blargg_err_t Std_File_Reader::seek( long n )
+{
+#if 0//[ZDOOM:unneeded]def HAVE_ZLIB_H
+	if ( file_ )
+	{
+		if ( gzseek( reinterpret_cast<gzFile>( file_ ), n, SEEK_SET ) >= 0 )
+			return nullptr;
+		if ( n > size_ )
+			return eof_error;
+		return "Error seeking in GZ file";
+	}
+#endif
+	if ( !fseek( reinterpret_cast<FILE*>( file_ ), n, SEEK_SET ) )
+		return nullptr;
+	if ( n > size() )
+		return eof_error;
+	return "Error seeking in file";
+}
+
+void Std_File_Reader::close()
+{
+	if ( file_ )
+	{
+#if 0//[ZDOOM:unneeded]def HAVE_ZLIB_H
+		gzclose( reinterpret_cast<gzFile>( file_ ) );
+#else
+		fclose( reinterpret_cast<FILE*>( file_ ) );
+#endif
+		file_ = nullptr;
+	}
+}
+
diff --git a/libraries/game-music-emu/gme/Data_Reader.h b/libraries/game-music-emu/gme/Data_Reader.h
new file mode 100644
index 000000000..59357767e
--- /dev/null
+++ b/libraries/game-music-emu/gme/Data_Reader.h
@@ -0,0 +1,149 @@
+// Data reader interface for uniform access
+
+// File_Extractor 0.4.0
+#ifndef DATA_READER_H
+#define DATA_READER_H
+
+#include "blargg_common.h"
+
+#ifdef HAVE_ZLIB_H
+#include <zlib.h>
+#endif
+
+// Supports reading and finding out how many bytes are remaining
+class Data_Reader {
+public:
+	virtual ~Data_Reader() { }
+	
+	static const char eof_error []; // returned by read() when request goes beyond end
+	
+	// Read at most count bytes and return number actually read, or <= 0 if error
+	virtual long read_avail( void*, long n ) = 0;
+	
+	// Read exactly count bytes and return error if they couldn't be read
+	virtual blargg_err_t read( void*, long count );
+	
+	// Number of bytes remaining until end of file
+	virtual long remain() const = 0;
+	
+	// Read and discard count bytes
+	virtual blargg_err_t skip( long count );
+	
+public:
+	Data_Reader() { }
+	typedef blargg_err_t error_t; // deprecated
+private:
+	// noncopyable
+	Data_Reader( const Data_Reader& );
+	Data_Reader& operator = ( const Data_Reader& );
+};
+
+// Supports seeking in addition to Data_Reader operations
+class File_Reader : public Data_Reader {
+public:
+	// Size of file
+	virtual long size() const = 0;
+	
+	// Current position in file
+	virtual long tell() const = 0;
+	
+	// Go to new position
+	virtual blargg_err_t seek( long ) = 0;
+	
+	long remain() const;
+	blargg_err_t skip( long n );
+};
+
+// Disk file reader
+class Std_File_Reader : public File_Reader {
+public:
+	blargg_err_t open( const char* path );
+	void close();
+	
+public:
+	Std_File_Reader();
+	~Std_File_Reader();
+	long size() const;
+	blargg_err_t read( void*, long );
+	long read_avail( void*, long );
+	long tell() const;
+	blargg_err_t seek( long );
+private:
+	void* file_; // Either FILE* or zlib's gzFile
+#if 0//[ZDOOM:unneeded] def HAVE_ZLIB_H
+	long size_; // TODO: Fix ABI compat
+#endif /* HAVE_ZLIB_H */
+};
+
+// Treats range of memory as a file
+class Mem_File_Reader : public File_Reader {
+public:
+	Mem_File_Reader( const void*, long size );
+#ifdef HAVE_ZLIB_H
+	~Mem_File_Reader( );
+#endif /* HAVE_ZLIB_H */
+
+public:
+	long size() const;
+	long read_avail( void*, long );
+	long tell() const;
+	blargg_err_t seek( long );
+private:
+#ifdef HAVE_ZLIB_H
+	bool gz_decompress();
+#endif /* HAVE_ZLIB_H */
+
+	const char* m_begin;
+	long m_size;
+	long m_pos;
+#ifdef HAVE_ZLIB_H
+	bool m_ownedPtr = false; // set if we must free m_begin
+#endif /* HAVE_ZLIB_H */
+};
+
+
+// Makes it look like there are only count bytes remaining
+class Subset_Reader : public Data_Reader {
+public:
+	Subset_Reader( Data_Reader*, long count );
+
+public:
+	long remain() const;
+	long read_avail( void*, long );
+private:
+	Data_Reader* in;
+	long remain_;
+};
+
+// Joins already-read header and remaining data into original file (to avoid seeking)
+class Remaining_Reader : public Data_Reader {
+public:
+	Remaining_Reader( void const* header, long size, Data_Reader* );
+
+public:
+	long remain() const;
+	long read_avail( void*, long );
+	blargg_err_t read( void*, long );
+private:
+	char const* header;
+	char const* header_end;
+	Data_Reader* in;
+	long read_first( void* out, long count );
+};
+
+// Invokes callback function to read data. Size of data must be specified in advance.
+class Callback_Reader : public Data_Reader {
+public:
+	typedef const char* (*callback_t)( void* data, void* out, int count );
+	Callback_Reader( callback_t, long size, void* data = 0 );
+public:
+	long read_avail( void*, long );
+	blargg_err_t read( void*, long );
+	long remain() const;
+private:
+	callback_t const callback;
+	void* const data;
+	long remain_;
+};
+
+#endif
diff --git a/libraries/game-music-emu/gme/Dual_Resampler.cpp b/libraries/game-music-emu/gme/Dual_Resampler.cpp
new file mode 100644
index 000000000..e774d85f8
--- /dev/null
+++ b/libraries/game-music-emu/gme/Dual_Resampler.cpp
@@ -0,0 +1,141 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Dual_Resampler.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+/* Copyright (C) 2003-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+unsigned const resampler_extra = 256;
+
+Dual_Resampler::Dual_Resampler() :
+	sample_buf_size(0),
+	oversamples_per_frame(-1),
+	buf_pos(-1),
+	resampler_size(0)
+{
+}
+
+Dual_Resampler::~Dual_Resampler() { }
+
+blargg_err_t Dual_Resampler::reset( int pairs )
+{
+	// expand allocations a bit
+	RETURN_ERR( sample_buf.resize( (pairs + (pairs >> 2)) * 2 ) );
+	resize( pairs );
+	resampler_size = oversamples_per_frame + (oversamples_per_frame >> 2);
+	return resampler.buffer_size( resampler_size );
+}
+
+void Dual_Resampler::resize( int pairs )
+{
+	int new_sample_buf_size = pairs * 2;
+	if ( sample_buf_size != new_sample_buf_size )
+	{
+		if ( (unsigned) new_sample_buf_size > sample_buf.size() )
+		{
+			check( false );
+			return;
+		}
+		sample_buf_size = new_sample_buf_size;
+		oversamples_per_frame = int (pairs * resampler.ratio()) * 2 + 2;
+		clear();
+	}
+}
+
+void Dual_Resampler::play_frame_( Blip_Buffer& blip_buf, dsample_t* out )
+{
+	long pair_count = sample_buf_size >> 1;
+	blip_time_t blip_time = blip_buf.count_clocks( pair_count );
+	int sample_count = oversamples_per_frame - resampler.written();
+	
+	int new_count = play_frame( blip_time, sample_count, resampler.buffer() );
+	assert( new_count < resampler_size );
+	
+	blip_buf.end_frame( blip_time );
+	assert( blip_buf.samples_avail() == pair_count );
+	
+	resampler.write( new_count );
+
+#ifdef	NDEBUG // Avoid warning when asserts are disabled
+	resampler.read( sample_buf.begin(), sample_buf_size );
+#else
+	long count = resampler.read( sample_buf.begin(), sample_buf_size );
+	assert( count == (long) sample_buf_size );
+#endif
+	
+	mix_samples( blip_buf, out );
+	blip_buf.remove_samples( pair_count );
+}
+
+void Dual_Resampler::dual_play( long count, dsample_t* out, Blip_Buffer& blip_buf )
+{
+	// empty extra buffer
+	long remain = sample_buf_size - buf_pos;
+	if ( remain )
+	{
+		if ( remain > count )
+			remain = count;
+		count -= remain;
+		memcpy( out, &sample_buf [buf_pos], remain * sizeof *out );
+		out += remain;
+		buf_pos += remain;
+	}
+	
+	// entire frames
+	while ( count >= (long) sample_buf_size )
+	{
+		play_frame_( blip_buf, out );
+		out += sample_buf_size;
+		count -= sample_buf_size;
+	}
+	
+	// extra
+	if ( count )
+	{
+		play_frame_( blip_buf, sample_buf.begin() );
+		buf_pos = count;
+		memcpy( out, sample_buf.begin(), count * sizeof *out );
+		out += count;
+	}
+}
+
+void Dual_Resampler::mix_samples( Blip_Buffer& blip_buf, dsample_t* out )
+{
+	Blip_Reader sn;
+	int bass = sn.begin( blip_buf );
+	const dsample_t* in = sample_buf.begin();
+	
+	for ( int n = sample_buf_size >> 1; n--; )
+	{
+		int s = sn.read();
+		blargg_long l = (blargg_long) in [0] * 2 + s;
+		if ( (int16_t) l != l )
+			l = 0x7FFF - (l >> 24);
+		
+		sn.next( bass );
+		blargg_long r = (blargg_long) in [1] * 2 + s;
+		if ( (int16_t) r != r )
+			r = 0x7FFF - (r >> 24);
+		
+		in += 2;
+		out [0] = l;
+		out [1] = r;
+		out += 2;
+	}
+	
+	sn.end( blip_buf );
+}
+
diff --git a/libraries/game-music-emu/gme/Dual_Resampler.h b/libraries/game-music-emu/gme/Dual_Resampler.h
new file mode 100644
index 000000000..512fd97d0
--- /dev/null
+++ b/libraries/game-music-emu/gme/Dual_Resampler.h
@@ -0,0 +1,50 @@
+// Combination of Fir_Resampler and Blip_Buffer mixing. Used by Sega FM emulators.
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef DUAL_RESAMPLER_H
+#define DUAL_RESAMPLER_H
+
+#include "Fir_Resampler.h"
+#include "Blip_Buffer.h"
+
+class Dual_Resampler {
+public:
+	Dual_Resampler();
+	virtual ~Dual_Resampler();
+	
+	typedef short dsample_t;
+	
+	double setup( double oversample, double rolloff, double gain );
+	blargg_err_t reset( int max_pairs );
+	void resize( int pairs_per_frame );
+	void clear();
+	
+	void dual_play( long count, dsample_t* out, Blip_Buffer& );
+	
+protected:
+	virtual int play_frame( blip_time_t, int pcm_count, dsample_t* pcm_out ) = 0;
+private:
+	
+	blargg_vector<dsample_t> sample_buf;
+	int sample_buf_size;
+	int oversamples_per_frame;
+	int buf_pos;
+	int resampler_size;
+	
+	Fir_Resampler<12> resampler;
+	void mix_samples( Blip_Buffer&, dsample_t* );
+	void play_frame_( Blip_Buffer&, dsample_t* );
+};
+
+inline double Dual_Resampler::setup( double oversample, double rolloff, double gain )
+{
+	return resampler.time_ratio( oversample, rolloff, gain * 0.5 );
+}
+
+inline void Dual_Resampler::clear()
+{
+	buf_pos = sample_buf_size;
+	resampler.clear();
+}
+
+#endif
diff --git a/libraries/game-music-emu/gme/Effects_Buffer.cpp b/libraries/game-music-emu/gme/Effects_Buffer.cpp
new file mode 100644
index 000000000..56b0c5b5c
--- /dev/null
+++ b/libraries/game-music-emu/gme/Effects_Buffer.cpp
@@ -0,0 +1,595 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Effects_Buffer.h"
+
+#include <string.h>
+
+/* Copyright (C) 2003-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+#ifdef BLARGG_ENABLE_OPTIMIZER
+	#include BLARGG_ENABLE_OPTIMIZER
+#endif
+
+typedef blargg_long fixed_t;
+
+#define TO_FIXED( f )   fixed_t ((f) * (1L << 15) + 0.5)
+#define FMUL( x, y )    (((x) * (y)) >> 15)
+
+const unsigned echo_size = 4096;
+const unsigned echo_mask = echo_size - 1;
+BOOST_STATIC_ASSERT( (echo_size & echo_mask) == 0 ); // must be power of 2
+
+const unsigned reverb_size = 8192 * 2;
+const unsigned reverb_mask = reverb_size - 1;
+BOOST_STATIC_ASSERT( (reverb_size & reverb_mask) == 0 ); // must be power of 2
+
+Effects_Buffer::config_t::config_t()
+{
+	pan_1           = -0.15f;
+	pan_2           =  0.15f;
+	reverb_delay    = 88.0f;
+	reverb_level    = 0.12f;
+	echo_delay      = 61.0f;
+	echo_level      = 0.10f;
+	delay_variance  = 18.0f;
+	effects_enabled = false;
+}
+
+void Effects_Buffer::set_depth( double d )
+{
+	float f = (float) d;
+	config_t c;
+	c.pan_1             = -0.6f * f;
+	c.pan_2             =  0.6f * f;
+	c.reverb_delay      = 880 * 0.1f;
+	c.echo_delay        = 610 * 0.1f;
+	if ( f > 0.5 )
+		f = 0.5; // TODO: more linear reduction of extreme reverb/echo
+	c.reverb_level      = 0.5f * f;
+	c.echo_level        = 0.30f * f;
+	c.delay_variance    = 180 * 0.1f;
+	c.effects_enabled   = (d > 0.0f);
+	config( c );
+}
+
+Effects_Buffer::Effects_Buffer( int num_voices, bool center_only )
+	: Multi_Buffer( 2*num_voices )
+	, max_voices(num_voices)
+	, bufs(max_voices * (center_only ? (max_buf_count - 4) : max_buf_count))
+	, chan_types(max_voices * chan_types_count)
+	, stereo_remain(0)
+	, effect_remain(0)
+	// TODO: Reorder buf_count to be initialized before bufs to factor out channel sizing
+	, buf_count(max_voices * (center_only ? (max_buf_count - 4) : max_buf_count))
+	, effects_enabled(false)
+	, reverb_buf(max_voices, std::vector<blip_sample_t>(reverb_size))
+	, echo_buf(max_voices, std::vector<blip_sample_t>(echo_size))
+	, reverb_pos(max_voices)
+	, echo_pos(max_voices)
+{
+	set_depth( 0 );
+}
+
+Effects_Buffer::~Effects_Buffer()
+{}
+
+blargg_err_t Effects_Buffer::set_sample_rate( long rate, int msec )
+{
+	try
+	{
+		for(int i=0; i<max_voices; i++)
+		{
+			if ( !echo_buf[i].size() )
+			{
+				echo_buf[i].resize( echo_size );
+			}
+			
+			if ( !reverb_buf[i].size() )
+			{
+				reverb_buf[i].resize( reverb_size );
+			}
+		}
+	}
+	catch(std::bad_alloc& ba)
+	{
+		return "Out of memory";
+	}
+
+	for ( int i = 0; i < buf_count; i++ )
+		RETURN_ERR( bufs [i].set_sample_rate( rate, msec ) );
+	
+	config( config_ );
+	clear();
+	
+	return Multi_Buffer::set_sample_rate( bufs [0].sample_rate(), bufs [0].length() );
+}
+
+void Effects_Buffer::clock_rate( long rate )
+{
+	for ( int i = 0; i < buf_count; i++ )
+		bufs [i].clock_rate( rate );
+}
+
+void Effects_Buffer::bass_freq( int freq )
+{
+	for ( int i = 0; i < buf_count; i++ )
+		bufs [i].bass_freq( freq );
+}
+
+void Effects_Buffer::clear()
+{
+	stereo_remain = 0;
+	effect_remain = 0;
+
+	for(int i=0; i<max_voices; i++)
+	{
+		if ( echo_buf[i].size() )
+			memset( &echo_buf[i][0], 0, echo_size * sizeof echo_buf[i][0] );
+		
+		if ( reverb_buf[i].size() )
+			memset( &reverb_buf[i][0], 0, reverb_size * sizeof reverb_buf[i][0] );
+	}
+
+	for ( int i = 0; i < buf_count; i++ )
+		bufs [i].clear();
+}
+
+inline int pin_range( int n, int max, int min = 0 )
+{
+	if ( n < min )
+		return min;
+	if ( n > max )
+		return max;
+	return n;
+}
+
+void Effects_Buffer::config( const config_t& cfg )
+{
+	channels_changed();
+	
+	// clear echo and reverb buffers
+	// ensure the echo/reverb buffers have already been allocated, so this method can be
+	// called before set_sample_rate is called
+	if ( !config_.effects_enabled && cfg.effects_enabled && echo_buf[0].size() )
+	{
+		for(int i=0; i<max_voices; i++)
+		{
+			memset( &echo_buf[i][0], 0, echo_size * sizeof echo_buf[i][0] );
+			memset( &reverb_buf[i][0], 0, reverb_size * sizeof reverb_buf[i][0] );
+		}
+	}
+
+	config_ = cfg;
+	
+	if ( config_.effects_enabled )
+	{
+		// convert to internal format
+		
+		chans.pan_1_levels [0] = TO_FIXED( 1 ) - TO_FIXED( config_.pan_1 );
+		chans.pan_1_levels [1] = TO_FIXED( 2 ) - chans.pan_1_levels [0];
+		
+		chans.pan_2_levels [0] = TO_FIXED( 1 ) - TO_FIXED( config_.pan_2 );
+		chans.pan_2_levels [1] = TO_FIXED( 2 ) - chans.pan_2_levels [0];
+		
+		chans.reverb_level = TO_FIXED( config_.reverb_level );
+		chans.echo_level = TO_FIXED( config_.echo_level );
+		
+		int delay_offset = int (1.0 / 2000 * config_.delay_variance * sample_rate());
+		
+		int reverb_sample_delay = int (1.0 / 1000 * config_.reverb_delay * sample_rate());
+		chans.reverb_delay_l = pin_range( reverb_size -
+				(reverb_sample_delay - delay_offset) * 2, reverb_size - 2, 0 );
+		chans.reverb_delay_r = pin_range( reverb_size + 1 -
+				(reverb_sample_delay + delay_offset) * 2, reverb_size - 1, 1 );
+		
+		int echo_sample_delay = int (1.0 / 1000 * config_.echo_delay * sample_rate());
+		chans.echo_delay_l = pin_range( echo_size - 1 - (echo_sample_delay - delay_offset),
+				echo_size - 1 );
+		chans.echo_delay_r = pin_range( echo_size - 1 - (echo_sample_delay + delay_offset),
+				echo_size - 1 );
+		
+		for(int i=0; i<max_voices; i++)
+		{
+			chan_types [i*chan_types_count+0].center = &bufs [i*max_buf_count+0];
+			chan_types [i*chan_types_count+0].left   = &bufs [i*max_buf_count+3];
+			chan_types [i*chan_types_count+0].right  = &bufs [i*max_buf_count+4];
+			
+			chan_types [i*chan_types_count+1].center = &bufs [i*max_buf_count+1];
+			chan_types [i*chan_types_count+1].left   = &bufs [i*max_buf_count+3];
+			chan_types [i*chan_types_count+1].right  = &bufs [i*max_buf_count+4];
+			
+			chan_types [i*chan_types_count+2].center = &bufs [i*max_buf_count+2];
+			chan_types [i*chan_types_count+2].left   = &bufs [i*max_buf_count+5];
+			chan_types [i*chan_types_count+2].right  = &bufs [i*max_buf_count+6];
+		}
+		assert( 2 < chan_types_count );
+	}
+	else
+	{
+		for(int i=0; i<max_voices; i++)
+		{
+			// set up outputs
+			for ( int j = 0; j < chan_types_count; j++ )
+			{
+				channel_t& c = chan_types [i*chan_types_count+j];
+				c.center = &bufs [i*max_buf_count+0];
+				c.left   = &bufs [i*max_buf_count+1];
+				c.right  = &bufs [i*max_buf_count+2];
+			}
+		}
+	}
+	
+	if ( buf_count < max_buf_count ) // if center_only
+	{
+		for(int i=0; i<max_voices; i++)
+		{
+			for ( int j = 0; j < chan_types_count; j++ )
+			{
+				channel_t& c = chan_types [i*chan_types_count+j];
+				c.left   = c.center;
+				c.right  = c.center;
+			}
+		}
+	}
+}
+
+Effects_Buffer::channel_t Effects_Buffer::channel( int i, int type )
+{
+	int out = chan_types_count-1;
+	if ( !type )
+	{
+		out = i % 5;
+		if ( out > chan_types_count-1 )
+			out = chan_types_count-1;
+	}
+	else if ( !(type & noise_type) && (type & type_index_mask) % 3 != 0 )
+	{
+		out = type & 1;
+	}
+	return chan_types [(i%max_voices)*chan_types_count+out];
+}
+	
+void Effects_Buffer::end_frame( blip_time_t clock_count )
+{
+	int bufs_used = 0;
+	int stereo_mask = (config_.effects_enabled ? 0x78 : 0x06);
+
+	const int buf_count_per_voice = buf_count/max_voices;
+	for ( int v = 0; v < max_voices; v++ ) // foreach voice
+	{
+		for ( int i = 0; i < buf_count_per_voice; i++) // foreach buffer of that voice
+		{
+			bufs_used |= bufs [v*buf_count_per_voice + i].clear_modified() << i;
+			bufs [v*buf_count_per_voice + i].end_frame( clock_count );
+
+			if ( (bufs_used & stereo_mask) && buf_count == max_voices*max_buf_count )
+				stereo_remain = max(stereo_remain, bufs [v*buf_count_per_voice + i].samples_avail() + bufs [v*buf_count_per_voice + i].output_latency());
+			if ( effects_enabled || config_.effects_enabled )
+				effect_remain = max(effect_remain, bufs [v*buf_count_per_voice + i].samples_avail() + bufs [v*buf_count_per_voice + i].output_latency());
+		}
+		bufs_used = 0;
+	}
+	
+	effects_enabled = config_.effects_enabled;
+}
+
+long Effects_Buffer::samples_avail() const
+{
+	return bufs [0].samples_avail() * 2;
+}
+
+long Effects_Buffer::read_samples( blip_sample_t* out, long total_samples )
+{
+	const int n_channels = max_voices * 2;
+	const int buf_count_per_voice = buf_count/max_voices;
+
+	require( total_samples % n_channels == 0 ); // as many items needed to fill at least one frame
+
+	long remain = bufs [0].samples_avail();
+	total_samples = remain = min( remain, total_samples/n_channels );
+
+	while ( remain )
+	{
+		int active_bufs = buf_count_per_voice;
+		long count = remain;
+		
+		// optimizing mixing to skip any channels which had nothing added
+		if ( effect_remain )
+		{
+			if ( count > effect_remain )
+				count = effect_remain;
+			
+			if ( stereo_remain )
+			{
+				mix_enhanced( out, count );
+			}
+			else
+			{
+				mix_mono_enhanced( out, count );
+				active_bufs = 3;
+			}
+		}
+		else if ( stereo_remain )
+		{
+			mix_stereo( out, count );
+			active_bufs = 3; 
+		}
+		else
+		{
+			mix_mono( out, count );
+			active_bufs = 1; 
+		}
+		
+		out += count * n_channels;
+		remain -= count;
+		
+		stereo_remain -= count;
+		if ( stereo_remain < 0 )
+			stereo_remain = 0;
+		
+		effect_remain -= count;
+		if ( effect_remain < 0 )
+			effect_remain = 0;
+		
+		// skip the output from any buffers that didn't contribute to the sound output
+		// during this frame (e.g. if we only render mono then only the very first buf
+		// is 'active')
+		for ( int v = 0; v < max_voices; v++ ) // foreach voice
+		{
+			for ( int i = 0; i < buf_count_per_voice; i++) // foreach buffer of that voice
+			{
+				if ( i < active_bufs )
+					bufs [v*buf_count_per_voice + i].remove_samples( count );
+				else // keep time synchronized
+					bufs [v*buf_count_per_voice + i].remove_silence( count );
+			}
+		}
+	}
+	
+	return total_samples * n_channels;
+}
+
+void Effects_Buffer::mix_mono( blip_sample_t* out_, blargg_long count )
+{
+    for(int i=0; i<max_voices; i++)
+    {
+	blip_sample_t* BLIP_RESTRICT out = out_;
+	int const bass = BLIP_READER_BASS( bufs [i*max_buf_count+0] );
+	BLIP_READER_BEGIN( c, bufs [i*max_buf_count+0] );
+	
+	// unrolled loop
+	for ( blargg_long n = count >> 1; n; --n )
+	{
+		blargg_long cs0 = BLIP_READER_READ( c );
+		BLIP_READER_NEXT( c, bass );
+		
+		blargg_long cs1 = BLIP_READER_READ( c );
+		BLIP_READER_NEXT( c, bass );
+		
+		if ( (int16_t) cs0 != cs0 )
+			cs0 = 0x7FFF - (cs0 >> 24);
+		((uint32_t*) out) [i*2+0] = ((uint16_t) cs0) | (uint16_t(cs0) << 16);
+		
+		if ( (int16_t) cs1 != cs1 )
+			cs1 = 0x7FFF - (cs1 >> 24);
+		((uint32_t*) out) [i*2+1] = ((uint16_t) cs1) | (uint16_t(cs1) << 16);
+		out += max_voices*4;
+	}
+	
+	if ( count & 1 )
+	{
+		int s = BLIP_READER_READ( c );
+		BLIP_READER_NEXT( c, bass );
+		out [i*2+0] = s;
+		out [i*2+1] = s;
+		if ( (int16_t) s != s )
+		{
+			s = 0x7FFF - (s >> 24);
+			out [i*2+0] = s;
+			out [i*2+1] = s;
+		}
+	}
+	
+	BLIP_READER_END( c, bufs [i*max_buf_count+0] );
+    }
+}
+
+void Effects_Buffer::mix_stereo( blip_sample_t* out_, blargg_long frames )
+{
+    for(int i=0; i<max_voices; i++)
+    {
+	blip_sample_t* BLIP_RESTRICT out = out_;
+	int const bass = BLIP_READER_BASS( bufs [i*max_buf_count+0] );
+	BLIP_READER_BEGIN( c, bufs [i*max_buf_count+0] );
+	BLIP_READER_BEGIN( l, bufs [i*max_buf_count+1] );
+	BLIP_READER_BEGIN( r, bufs [i*max_buf_count+2] );
+
+	int count = frames;
+	while ( count-- )
+	{
+		int cs = BLIP_READER_READ( c );
+		BLIP_READER_NEXT( c, bass );
+		int left = cs + BLIP_READER_READ( l );
+		int right = cs + BLIP_READER_READ( r );
+		BLIP_READER_NEXT( l, bass );
+		BLIP_READER_NEXT( r, bass );
+		
+		if ( (int16_t) left != left )
+			left = 0x7FFF - (left >> 24);
+		
+		if ( (int16_t) right != right )
+			right = 0x7FFF - (right >> 24);
+
+		out [i*2+0] = left;
+		out [i*2+1] = right;
+		
+		out += max_voices*2;
+		
+	}
+	
+	BLIP_READER_END( r, bufs [i*max_buf_count+2] );
+	BLIP_READER_END( l, bufs [i*max_buf_count+1] );
+	BLIP_READER_END( c, bufs [i*max_buf_count+0] );
+    }
+}
+
+void Effects_Buffer::mix_mono_enhanced( blip_sample_t* out_, blargg_long frames )
+{
+	for(int i=0; i<max_voices; i++)
+	{
+	blip_sample_t* BLIP_RESTRICT out = out_;
+	int const bass = BLIP_READER_BASS( bufs [i*max_buf_count+2] );
+	BLIP_READER_BEGIN( center, bufs [i*max_buf_count+2] );
+	BLIP_READER_BEGIN( sq1, bufs [i*max_buf_count+0] );
+	BLIP_READER_BEGIN( sq2, bufs [i*max_buf_count+1] );
+	
+	blip_sample_t* const reverb_buf = &this->reverb_buf[i][0];
+	blip_sample_t* const echo_buf = &this->echo_buf[i][0];
+	int echo_pos = this->echo_pos[i];
+	int reverb_pos = this->reverb_pos[i];
+	
+	int count = frames;
+	while ( count-- )
+	{
+		int sum1_s = BLIP_READER_READ( sq1 );
+		int sum2_s = BLIP_READER_READ( sq2 );
+		
+		BLIP_READER_NEXT( sq1, bass );
+		BLIP_READER_NEXT( sq2, bass );
+		
+		int new_reverb_l = FMUL( sum1_s, chans.pan_1_levels [0] ) +
+				FMUL( sum2_s, chans.pan_2_levels [0] ) +
+				reverb_buf [(reverb_pos + chans.reverb_delay_l) & reverb_mask];
+		
+		int new_reverb_r = FMUL( sum1_s, chans.pan_1_levels [1] ) +
+				FMUL( sum2_s, chans.pan_2_levels [1] ) +
+				reverb_buf [(reverb_pos + chans.reverb_delay_r) & reverb_mask];
+		
+		fixed_t reverb_level = chans.reverb_level;
+		reverb_buf [reverb_pos] = (blip_sample_t) FMUL( new_reverb_l, reverb_level );
+		reverb_buf [reverb_pos + 1] = (blip_sample_t) FMUL( new_reverb_r, reverb_level );
+		reverb_pos = (reverb_pos + 2) & reverb_mask;
+		
+		int sum3_s = BLIP_READER_READ( center );
+		BLIP_READER_NEXT( center, bass );
+		
+		int left = new_reverb_l + sum3_s + FMUL( chans.echo_level,
+				echo_buf [(echo_pos + chans.echo_delay_l) & echo_mask] );
+		int right = new_reverb_r + sum3_s + FMUL( chans.echo_level,
+				echo_buf [(echo_pos + chans.echo_delay_r) & echo_mask] );
+		
+		echo_buf [echo_pos] = sum3_s;
+		echo_pos = (echo_pos + 1) & echo_mask;
+		
+		if ( (int16_t) left != left )
+			left = 0x7FFF - (left >> 24);
+		
+		if ( (int16_t) right != right )
+			right = 0x7FFF - (right >> 24);
+
+		out [i*2+0] = left;
+		out [i*2+1] = right;
+		out += max_voices*2;
+	}
+	this->reverb_pos[i] = reverb_pos;
+	this->echo_pos[i] = echo_pos;
+	
+	BLIP_READER_END( sq1, bufs [i*max_buf_count+0] );
+	BLIP_READER_END( sq2, bufs [i*max_buf_count+1] );
+	BLIP_READER_END( center, bufs [i*max_buf_count+2] );
+    }
+}
+
+void Effects_Buffer::mix_enhanced( blip_sample_t* out_, blargg_long frames )
+{
+    for(int i=0; i<max_voices; i++)
+    {
+	blip_sample_t* BLIP_RESTRICT out = out_;
+	int const bass = BLIP_READER_BASS( bufs [i*max_buf_count+2] );
+	BLIP_READER_BEGIN( center, bufs [i*max_buf_count+2] );
+	BLIP_READER_BEGIN( l1, bufs [i*max_buf_count+3] );
+	BLIP_READER_BEGIN( r1, bufs [i*max_buf_count+4] );
+	BLIP_READER_BEGIN( l2, bufs [i*max_buf_count+5] );
+	BLIP_READER_BEGIN( r2, bufs [i*max_buf_count+6] );
+	BLIP_READER_BEGIN( sq1, bufs [i*max_buf_count+0] );
+	BLIP_READER_BEGIN( sq2, bufs [i*max_buf_count+1] );
+	
+	blip_sample_t* const reverb_buf = &this->reverb_buf[i][0];
+	blip_sample_t* const echo_buf = &this->echo_buf[i][0];
+	int echo_pos = this->echo_pos[i];
+	int reverb_pos = this->reverb_pos[i];
+	
+	int count = frames;
+	while ( count-- )
+	{
+		int sum1_s = BLIP_READER_READ( sq1 );
+		int sum2_s = BLIP_READER_READ( sq2 );
+		
+		BLIP_READER_NEXT( sq1, bass );
+		BLIP_READER_NEXT( sq2, bass );
+		
+		int new_reverb_l = FMUL( sum1_s, chans.pan_1_levels [0] ) +
+				FMUL( sum2_s, chans.pan_2_levels [0] ) + BLIP_READER_READ( l1 ) +
+				reverb_buf [(reverb_pos + chans.reverb_delay_l) & reverb_mask];
+		
+		int new_reverb_r = FMUL( sum1_s, chans.pan_1_levels [1] ) +
+				FMUL( sum2_s, chans.pan_2_levels [1] ) + BLIP_READER_READ( r1 ) +
+				reverb_buf [(reverb_pos + chans.reverb_delay_r) & reverb_mask];
+		
+		BLIP_READER_NEXT( l1, bass );
+		BLIP_READER_NEXT( r1, bass );
+		
+		fixed_t reverb_level = chans.reverb_level;
+		reverb_buf [reverb_pos] = (blip_sample_t) FMUL( new_reverb_l, reverb_level );
+		reverb_buf [reverb_pos + 1] = (blip_sample_t) FMUL( new_reverb_r, reverb_level );
+		reverb_pos = (reverb_pos + 2) & reverb_mask;
+		
+		int sum3_s = BLIP_READER_READ( center );
+		BLIP_READER_NEXT( center, bass );
+		
+		int left = new_reverb_l + sum3_s + BLIP_READER_READ( l2 ) + FMUL( chans.echo_level,
+				echo_buf [(echo_pos + chans.echo_delay_l) & echo_mask] );
+		int right = new_reverb_r + sum3_s + BLIP_READER_READ( r2 ) + FMUL( chans.echo_level,
+				echo_buf [(echo_pos + chans.echo_delay_r) & echo_mask] );
+		
+		BLIP_READER_NEXT( l2, bass );
+		BLIP_READER_NEXT( r2, bass );
+		
+		echo_buf [echo_pos] = sum3_s;
+		echo_pos = (echo_pos + 1) & echo_mask;
+		
+		if ( (int16_t) left != left )
+			left = 0x7FFF - (left >> 24);
+		
+		if ( (int16_t) right != right )
+			right = 0x7FFF - (right >> 24);
+
+		out [i*2+0] = left;
+		out [i*2+1] = right;
+
+		out += max_voices*2;
+	}
+	this->reverb_pos[i] = reverb_pos;
+	this->echo_pos[i] = echo_pos;
+	
+	BLIP_READER_END( l1, bufs [i*max_buf_count+3] );
+	BLIP_READER_END( r1, bufs [i*max_buf_count+4] );
+	BLIP_READER_END( l2, bufs [i*max_buf_count+5] );
+	BLIP_READER_END( r2, bufs [i*max_buf_count+6] );
+	BLIP_READER_END( sq1, bufs [i*max_buf_count+0] );
+	BLIP_READER_END( sq2, bufs [i*max_buf_count+1] );
+	BLIP_READER_END( center, bufs [i*max_buf_count+2] );
+    }
+}
+
diff --git a/libraries/game-music-emu/gme/Effects_Buffer.h b/libraries/game-music-emu/gme/Effects_Buffer.h
new file mode 100644
index 000000000..ec634d622
--- /dev/null
+++ b/libraries/game-music-emu/gme/Effects_Buffer.h
@@ -0,0 +1,90 @@
+// Multi-channel effects buffer with panning, echo and reverb
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef EFFECTS_BUFFER_H
+#define EFFECTS_BUFFER_H
+
+#include "Multi_Buffer.h"
+
+#include <vector>
+
+// Effects_Buffer uses several buffers and outputs stereo sample pairs.
+class Effects_Buffer : public Multi_Buffer {
+public:
+	// nVoices indicates the number of voices for which buffers will be allocated
+	// to make Effects_Buffer work as "mix everything to one", nVoices will be 1
+	// If center_only is true, only center buffers are created and
+	// less memory is used.
+	Effects_Buffer( int nVoices = 1, bool center_only = false );
+	
+	// Channel  Effect    Center Pan
+	// ---------------------------------
+	//    0,5    reverb       pan_1
+	//    1,6    reverb       pan_2
+	//    2,7    echo         -
+	//    3      echo         -
+	//    4      echo         -
+	
+	// Channel configuration
+	struct config_t {
+		double pan_1;           // -1.0 = left, 0.0 = center, 1.0 = right
+		double pan_2;
+		double echo_delay;      // msec
+		double echo_level;      // 0.0 to 1.0
+		double reverb_delay;    // msec
+		double delay_variance;  // difference between left/right delays (msec)
+		double reverb_level;    // 0.0 to 1.0
+		bool effects_enabled;   // if false, use optimized simple mixer
+		config_t();
+	};
+	
+	// Set configuration of buffer
+	virtual void config( const config_t& );
+	void set_depth( double );
+	
+public:
+	~Effects_Buffer();
+	blargg_err_t set_sample_rate( long samples_per_sec, int msec = blip_default_length );
+	void clock_rate( long );
+	void bass_freq( int );
+	void clear();
+	channel_t channel( int, int );
+	void end_frame( blip_time_t );
+	long read_samples( blip_sample_t*, long );
+	long samples_avail() const;
+private:
+	typedef long fixed_t;
+	int max_voices;
+	enum { max_buf_count = 7 };
+	std::vector<Blip_Buffer> bufs;
+	enum { chan_types_count = 3 };
+	std::vector<channel_t> chan_types;
+	config_t config_;
+	long stereo_remain;
+	long effect_remain;
+	int buf_count;
+	bool effects_enabled;
+	
+	std::vector<std::vector<blip_sample_t> > reverb_buf;
+	std::vector<std::vector<blip_sample_t> > echo_buf;
+	std::vector<int> reverb_pos;
+	std::vector<int> echo_pos;
+	
+	struct {
+		fixed_t pan_1_levels [2];
+		fixed_t pan_2_levels [2];
+		int echo_delay_l;
+		int echo_delay_r;
+		fixed_t echo_level;
+		int reverb_delay_l;
+		int reverb_delay_r;
+		fixed_t reverb_level;
+	} chans;
+	
+	void mix_mono( blip_sample_t*, blargg_long );
+	void mix_stereo( blip_sample_t*, blargg_long );
+	void mix_enhanced( blip_sample_t*, blargg_long );
+	void mix_mono_enhanced( blip_sample_t*, blargg_long );
+};
+
+#endif
diff --git a/libraries/game-music-emu/gme/Fir_Resampler.cpp b/libraries/game-music-emu/gme/Fir_Resampler.cpp
new file mode 100644
index 000000000..d8dd6837c
--- /dev/null
+++ b/libraries/game-music-emu/gme/Fir_Resampler.cpp
@@ -0,0 +1,199 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Fir_Resampler.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+
+/* Copyright (C) 2004-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+#undef PI
+#define PI 3.1415926535897932384626433832795029
+
+static void gen_sinc( double rolloff, int width, double offset, double spacing, double scale,
+		int count, short* out )
+{
+	double const maxh = 256;
+	double const step = PI / maxh * spacing;
+	double const to_w = maxh * 2 / width;
+	double const pow_a_n = pow( rolloff, maxh );
+	scale /= maxh * 2;
+	
+	double angle = (count / 2 - 1 + offset) * -step;
+	while ( count-- )
+	{
+		*out++ = 0;
+		double w = angle * to_w;
+		if ( fabs( w ) < PI )
+		{
+			double rolloff_cos_a = rolloff * cos( angle );
+			double num = 1 - rolloff_cos_a -
+					pow_a_n * cos( maxh * angle ) +
+					pow_a_n * rolloff * cos( (maxh - 1) * angle );
+			double den = 1 - rolloff_cos_a - rolloff_cos_a + rolloff * rolloff;
+			double sinc = scale * num / den - scale;
+			
+			out [-1] = (short) (cos( w ) * sinc + sinc);
+		}
+		angle += step;
+	}
+}
+
+Fir_Resampler_::Fir_Resampler_( int width, sample_t* impulses_ ) :
+	width_( width ),
+	write_offset( width * stereo - stereo ),
+	impulses( impulses_ )
+{
+	write_pos = 0;
+	res       = 1;
+	imp_phase = 0;
+	skip_bits = 0;
+	step      = stereo;
+	ratio_    = 1.0;
+}
+
+Fir_Resampler_::~Fir_Resampler_() { }
+
+void Fir_Resampler_::clear()
+{
+	imp_phase = 0;
+	if ( buf.size() )
+	{
+		write_pos = &buf [write_offset];
+		memset( buf.begin(), 0, write_offset * sizeof buf [0] );
+	}
+}
+
+blargg_err_t Fir_Resampler_::buffer_size( int new_size )
+{
+	RETURN_ERR( buf.resize( new_size + write_offset ) );
+	clear();
+	return 0;
+}
+	
+double Fir_Resampler_::time_ratio( double new_factor, double rolloff, double gain )
+{
+	ratio_ = new_factor;
+	
+	double fstep = 0.0;
+	{
+		double least_error = 2;
+		double pos = 0;
+		res = -1;
+		for ( int r = 1; r <= max_res; r++ )
+		{
+			pos += ratio_;
+			double nearest = floor( pos + 0.5 );
+			double error = fabs( pos - nearest );
+			if ( error < least_error )
+			{
+				res = r;
+				fstep = nearest / res;
+				least_error = error;
+			}
+		}
+	}
+	
+	skip_bits = 0;
+	
+	step = stereo * (int) floor( fstep );
+	
+	ratio_ = fstep;
+	fstep = fmod( fstep, 1.0 );
+	
+	double filter = (ratio_ < 1.0) ? 1.0 : 1.0 / ratio_;
+	double pos = 0.0;
+	input_per_cycle = 0;
+	for ( int i = 0; i < res; i++ )
+	{
+		gen_sinc( rolloff, int (width_ * filter + 1) & ~1, pos, filter,
+				double (0x7FFF * gain * filter),
+				(int) width_, impulses + i * width_ );
+		
+		pos += fstep;
+		input_per_cycle += step;
+		if ( pos >= 0.9999999 )
+		{
+			pos -= 1.0;
+			skip_bits |= 1 << i;
+			input_per_cycle++;
+		}
+	}
+	
+	clear();
+	
+	return ratio_;
+}
+
+int Fir_Resampler_::input_needed( blargg_long output_count ) const
+{
+	blargg_long input_count = 0;
+	
+	unsigned long skip = skip_bits >> imp_phase;
+	int remain = res - imp_phase;
+	while ( (output_count -= 2) > 0 )
+	{
+		input_count += step + (skip & 1) * stereo;
+		skip >>= 1;
+		if ( !--remain )
+		{
+			skip = skip_bits;
+			remain = res;
+		}
+		output_count -= 2;
+	}
+	
+	long input_extra = input_count - (write_pos - &buf [(width_ - 1) * stereo]);
+	if ( input_extra < 0 )
+		input_extra = 0;
+	return input_extra;
+}
+
+int Fir_Resampler_::avail_( blargg_long input_count ) const
+{
+	int cycle_count = input_count / input_per_cycle;
+	int output_count = cycle_count * res * stereo;
+	input_count -= cycle_count * input_per_cycle;
+	
+	blargg_ulong skip = skip_bits >> imp_phase;
+	int remain = res - imp_phase;
+	while ( input_count >= 0 )
+	{
+		input_count -= step + (skip & 1) * stereo;
+		skip >>= 1;
+		if ( !--remain )
+		{
+			skip = skip_bits;
+			remain = res;
+		}
+		output_count += 2;
+	}
+	return output_count;
+}
+
+int Fir_Resampler_::skip_input( long count )
+{
+	int remain = write_pos - buf.begin();
+	int max_count = remain - width_ * stereo;
+	if ( count > max_count )
+		count = max_count;
+	
+	remain -= count;
+	write_pos = &buf [remain];
+	memmove( buf.begin(), &buf [count], remain * sizeof buf [0] );
+	
+	return count;
+}
diff --git a/libraries/game-music-emu/gme/Fir_Resampler.h b/libraries/game-music-emu/gme/Fir_Resampler.h
new file mode 100644
index 000000000..d637ec41c
--- /dev/null
+++ b/libraries/game-music-emu/gme/Fir_Resampler.h
@@ -0,0 +1,171 @@
+// Finite impulse response (FIR) resampler with adjustable FIR size
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef FIR_RESAMPLER_H
+#define FIR_RESAMPLER_H
+
+#include "blargg_common.h"
+#include <string.h>
+
+class Fir_Resampler_ {
+public:
+	
+	// Use Fir_Resampler<width> (below)
+	
+	// Set input/output resampling ratio and optionally low-pass rolloff and gain.
+	// Returns actual ratio used (rounded to internal precision).
+	double time_ratio( double factor, double rolloff = 0.999, double gain = 1.0 );
+	
+	// Current input/output ratio
+	double ratio() const { return ratio_; }
+	
+// Input
+	
+	typedef short sample_t;
+	
+	// Resize and clear input buffer
+	blargg_err_t buffer_size( int );
+	
+	// Clear input buffer. At least two output samples will be available after
+	// two input samples are written.
+	void clear();
+	
+	// Number of input samples that can be written
+	int max_write() const { return buf.end() - write_pos; }
+	
+	// Pointer to place to write input samples
+	sample_t* buffer() { return write_pos; }
+	
+	// Notify resampler that 'count' input samples have been written
+	void write( long count );
+	
+	// Number of input samples in buffer
+	int written() const { return write_pos - &buf [write_offset]; }
+	
+	// Skip 'count' input samples. Returns number of samples actually skipped.
+	int skip_input( long count );
+	
+// Output
+	
+	// Number of extra input samples needed until 'count' output samples are available
+	int input_needed( blargg_long count ) const;
+	
+	// Number of output samples available
+	int avail() const { return avail_( write_pos - &buf [width_ * stereo] ); }
+	
+public:
+	~Fir_Resampler_();
+protected:
+	enum { stereo = 2 };
+	enum { max_res = 32 };
+	blargg_vector<sample_t> buf;
+	sample_t* write_pos;
+	int res;
+	int imp_phase;
+	int const width_;
+	int const write_offset;
+	blargg_ulong skip_bits;
+	int step;
+	int input_per_cycle;
+	double ratio_;
+	sample_t* impulses;
+	
+	Fir_Resampler_( int width, sample_t* );
+	int avail_( blargg_long input_count ) const;
+};
+
+// Width is number of points in FIR. Must be even and 4 or more. More points give
+// better quality and rolloff effectiveness, and take longer to calculate.
+template<int width>
+class Fir_Resampler : public Fir_Resampler_ {
+	BOOST_STATIC_ASSERT( width >= 4 && width % 2 == 0 );
+	short impulses [max_res] [width];
+public:
+	Fir_Resampler() : Fir_Resampler_( width, impulses [0] ) { }
+	
+	// Read at most 'count' samples. Returns number of samples actually read.
+	typedef short sample_t;
+	int read( sample_t* out, blargg_long count );
+};
+
+// End of public interface
+
+inline void Fir_Resampler_::write( long count )
+{
+	write_pos += count;
+	assert( write_pos <= buf.end() );
+}
+
+template<int width>
+int Fir_Resampler<width>::read( sample_t* out_begin, blargg_long count )
+{
+	sample_t* out = out_begin;
+	const sample_t* in = buf.begin();
+	sample_t* end_pos = write_pos;
+	blargg_ulong skip = skip_bits >> imp_phase;
+	sample_t const* imp = impulses [imp_phase];
+	int remain = res - imp_phase;
+	int const step = this->step;
+	
+	count >>= 1;
+	
+	if ( end_pos - in >= width * stereo )
+	{
+		end_pos -= width * stereo;
+		do
+		{
+			count--;
+			
+			// accumulate in extended precision
+			blargg_long l = 0;
+			blargg_long r = 0;
+			
+			const sample_t* i = in;
+			if ( count < 0 )
+				break;
+			
+			for ( int n = width / 2; n; --n )
+			{
+				int pt0 = imp [0];
+				l += pt0 * i [0];
+				r += pt0 * i [1];
+				int pt1 = imp [1];
+				imp += 2;
+				l += pt1 * i [2];
+				r += pt1 * i [3];
+				i += 4;
+			}
+			
+			remain--;
+			
+			l >>= 15;
+			r >>= 15;
+			
+			in += (skip * stereo) & stereo;
+			skip >>= 1;
+			in += step;
+			
+			if ( !remain )
+			{
+				imp = impulses [0];
+				skip = skip_bits;
+				remain = res;
+			}
+			
+			out [0] = (sample_t) l;
+			out [1] = (sample_t) r;
+			out += 2;
+		}
+		while ( in <= end_pos );
+	}
+	
+	imp_phase = res - remain;
+	
+	int left = write_pos - in;
+	write_pos = &buf [left];
+	memmove( buf.begin(), in, left * sizeof *in );
+	
+	return out - out_begin;
+}
+
+#endif
diff --git a/libraries/game-music-emu/gme/Gb_Apu.cpp b/libraries/game-music-emu/gme/Gb_Apu.cpp
new file mode 100644
index 000000000..82a9cc1b6
--- /dev/null
+++ b/libraries/game-music-emu/gme/Gb_Apu.cpp
@@ -0,0 +1,306 @@
+// Gb_Snd_Emu 0.1.5. http://www.slack.net/~ant/
+
+#include "Gb_Apu.h"
+
+#include <string.h>
+
+/* Copyright (C) 2003-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+unsigned const vol_reg    = 0xFF24;
+unsigned const status_reg = 0xFF26;
+
+Gb_Apu::Gb_Apu()
+{
+	square1.synth = &square_synth;
+	square2.synth = &square_synth;
+	wave.synth  = &other_synth;
+	noise.synth = &other_synth;
+	
+	oscs [0] = &square1;
+	oscs [1] = &square2;
+	oscs [2] = &wave;
+	oscs [3] = &noise;
+	
+	for ( int i = 0; i < osc_count; i++ )
+	{
+		Gb_Osc& osc = *oscs [i];
+		osc.regs = &regs [i * 5];
+		osc.output = 0;
+		osc.outputs [0] = 0;
+		osc.outputs [1] = 0;
+		osc.outputs [2] = 0;
+		osc.outputs [3] = 0;
+	}
+	
+	set_tempo( 1.0 );
+	volume( 1.0 );
+	reset();
+}
+
+void Gb_Apu::treble_eq( const blip_eq_t& eq )
+{
+	square_synth.treble_eq( eq );
+	other_synth.treble_eq( eq );
+}
+
+void Gb_Apu::osc_output( int index, Blip_Buffer* center, Blip_Buffer* left, Blip_Buffer* right )
+{
+	require( (unsigned) index < osc_count );
+	require( (center && left && right) || (!center && !left && !right) );
+	Gb_Osc& osc = *oscs [index];
+	osc.outputs [1] = right;
+	osc.outputs [2] = left;
+	osc.outputs [3] = center;
+	osc.output = osc.outputs [osc.output_select];
+}
+
+void Gb_Apu::output( Blip_Buffer* center, Blip_Buffer* left, Blip_Buffer* right )
+{
+	for ( int i = 0; i < osc_count; i++ )
+		osc_output( i, center, left, right );
+}
+
+void Gb_Apu::update_volume()
+{
+	// TODO: doesn't handle differing left/right global volume (support would
+	// require modification to all oscillator code)
+	int data = regs [vol_reg - start_addr];
+	double vol = (max( data & 7, data >> 4 & 7 ) + 1) * volume_unit;
+	square_synth.volume( vol );
+	other_synth.volume( vol );
+}
+
+static unsigned char const powerup_regs [0x20] = {
+	0x80,0x3F,0x00,0xFF,0xBF, // square 1
+	0xFF,0x3F,0x00,0xFF,0xBF, // square 2
+	0x7F,0xFF,0x9F,0xFF,0xBF, // wave
+	0xFF,0xFF,0x00,0x00,0xBF, // noise
+	0x00, // left/right enables
+	0x77, // master volume
+	0x80, // power
+	0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF
+};
+
+void Gb_Apu::set_tempo( double t )
+{
+	frame_period = 4194304 / 256; // 256 Hz
+	if ( t != 1.0 )
+		frame_period = blip_time_t (frame_period / t);
+}
+
+void Gb_Apu::reset()
+{
+	next_frame_time = 0;
+	last_time       = 0;
+	frame_count     = 0;
+	
+	square1.reset();
+	square2.reset();
+	wave.reset();
+	noise.reset();
+	noise.bits = 1;
+	wave.wave_pos = 0;
+	
+	// avoid click at beginning
+	regs [vol_reg - start_addr] = 0x77;
+	update_volume();
+	
+	regs [status_reg - start_addr] = 0x01; // force power
+	write_register( 0, status_reg, 0x00 );
+	
+	static unsigned char const initial_wave [] = {
+		0x84,0x40,0x43,0xAA,0x2D,0x78,0x92,0x3C, // wave table
+		0x60,0x59,0x59,0xB0,0x34,0xB8,0x2E,0xDA
+	};
+	memcpy( wave.wave, initial_wave, sizeof initial_wave );
+}
+
+void Gb_Apu::run_until( blip_time_t end_time )
+{
+	require( end_time >= last_time ); // end_time must not be before previous time
+	if ( end_time == last_time )
+		return;
+	
+	while ( true )
+	{
+		blip_time_t time = next_frame_time;
+		if ( time > end_time )
+			time = end_time;
+		
+		// run oscillators
+		for ( int i = 0; i < osc_count; ++i )
+		{
+			Gb_Osc& osc = *oscs [i];
+			if ( osc.output )
+			{
+				osc.output->set_modified(); // TODO: misses optimization opportunities?
+				int playing = false;
+				if ( osc.enabled && osc.volume &&
+						(!(osc.regs [4] & osc.len_enabled_mask) || osc.length) )
+					playing = -1;
+				switch ( i )
+				{
+				case 0: square1.run( last_time, time, playing ); break;
+				case 1: square2.run( last_time, time, playing ); break;
+				case 2: wave   .run( last_time, time, playing ); break;
+				case 3: noise  .run( last_time, time, playing ); break;
+				}
+			}
+		}
+		last_time = time;
+		
+		if ( time == end_time )
+			break;
+		
+		next_frame_time += frame_period;
+		
+		// 256 Hz actions
+		square1.clock_length();
+		square2.clock_length();
+		wave.clock_length();
+		noise.clock_length();
+		
+		frame_count = (frame_count + 1) & 3;
+		if ( frame_count == 0 )
+		{
+			// 64 Hz actions
+			square1.clock_envelope();
+			square2.clock_envelope();
+			noise.clock_envelope();
+		}
+		
+		if ( frame_count & 1 )
+			square1.clock_sweep(); // 128 Hz action
+	}
+}
+
+void Gb_Apu::end_frame( blip_time_t end_time )
+{
+	if ( end_time > last_time )
+		run_until( end_time );
+	
+	assert( next_frame_time >= end_time );
+	next_frame_time -= end_time;
+	
+	assert( last_time >= end_time );
+	last_time -= end_time;
+}
+
+void Gb_Apu::write_register( blip_time_t time, unsigned addr, int data )
+{
+	require( (unsigned) data < 0x100 );
+	
+	int reg = addr - start_addr;
+	if ( (unsigned) reg >= register_count )
+		return;
+	
+	run_until( time );
+	
+	int old_reg = regs [reg];
+	regs [reg] = data;
+	
+	if ( addr < vol_reg )
+	{
+		write_osc( reg / 5, reg, data );
+	}
+	else if ( addr == vol_reg && data != old_reg ) // global volume
+	{
+		// return all oscs to 0
+		for ( int i = 0; i < osc_count; i++ )
+		{
+			Gb_Osc& osc = *oscs [i];
+			int amp = osc.last_amp;
+			osc.last_amp = 0;
+			if ( amp && osc.enabled && osc.output )
+				other_synth.offset( time, -amp, osc.output );
+		}
+		
+		if ( wave.outputs [3] )
+			other_synth.offset( time, 30, wave.outputs [3] );
+		
+		update_volume();
+		
+		if ( wave.outputs [3] )
+			other_synth.offset( time, -30, wave.outputs [3] );
+		
+		// oscs will update with new amplitude when next run
+	}
+	else if ( addr == 0xFF25 || addr == status_reg )
+	{
+		int mask = (regs [status_reg - start_addr] & 0x80) ? ~0 : 0;
+		int flags = regs [0xFF25 - start_addr] & mask;
+		
+		// left/right assignments
+		for ( int i = 0; i < osc_count; i++ )
+		{
+			Gb_Osc& osc = *oscs [i];
+			osc.enabled &= mask;
+			int bits = flags >> i;
+			Blip_Buffer* old_output = osc.output;
+			osc.output_select = (bits >> 3 & 2) | (bits & 1);
+			osc.output = osc.outputs [osc.output_select];
+			if ( osc.output != old_output )
+			{
+				int amp = osc.last_amp;
+				osc.last_amp = 0;
+				if ( amp && old_output )
+					other_synth.offset( time, -amp, old_output );
+			}
+		}
+		
+		if ( addr == status_reg && data != old_reg )
+		{
+			if ( !(data & 0x80) )
+			{
+				for ( unsigned i = 0; i < sizeof powerup_regs; i++ )
+				{
+					if ( i != status_reg - start_addr )
+						write_register( time, i + start_addr, powerup_regs [i] );
+				}
+			}
+			else
+			{
+				//debug_printf( "APU powered on\n" );
+			}
+		}
+	}
+	else if ( addr >= 0xFF30 )
+	{
+		int index = (addr & 0x0F) * 2;
+		wave.wave [index] = data >> 4;
+		wave.wave [index + 1] = data & 0x0F;
+	}
+}
+
+int Gb_Apu::read_register( blip_time_t time, unsigned addr )
+{
+	run_until( time );
+	
+	int index = addr - start_addr;
+	require( (unsigned) index < register_count );
+	int data = regs [index];
+	
+	if ( addr == status_reg )
+	{
+		data = (data & 0x80) | 0x70;
+		for ( int i = 0; i < osc_count; i++ )
+		{
+			const Gb_Osc& osc = *oscs [i];
+			if ( osc.enabled && (osc.length || !(osc.regs [4] & osc.len_enabled_mask)) )
+				data |= 1 << i;
+		}
+	}
+	
+	return data;
+}
diff --git a/libraries/game-music-emu/gme/Gb_Apu.h b/libraries/game-music-emu/gme/Gb_Apu.h
new file mode 100644
index 000000000..9b251262f
--- /dev/null
+++ b/libraries/game-music-emu/gme/Gb_Apu.h
@@ -0,0 +1,90 @@
+// Nintendo Game Boy PAPU sound chip emulator
+
+// Gb_Snd_Emu 0.1.5
+#ifndef GB_APU_H
+#define GB_APU_H
+
+#include "Gb_Oscs.h"
+
+class Gb_Apu {
+public:
+	
+	// Set overall volume of all oscillators, where 1.0 is full volume
+	void volume( double );
+	
+	// Set treble equalization
+	void treble_eq( const blip_eq_t& );
+	
+	// Outputs can be assigned to a single buffer for mono output, or to three
+	// buffers for stereo output (using Stereo_Buffer to do the mixing).
+	
+	// Assign all oscillator outputs to specified buffer(s). If buffer
+	// is NULL, silences all oscillators.
+	void output( Blip_Buffer* mono );
+	void output( Blip_Buffer* center, Blip_Buffer* left, Blip_Buffer* right );
+	
+	// Assign single oscillator output to buffer(s). Valid indicies are 0 to 3,
+	// which refer to Square 1, Square 2, Wave, and Noise. If buffer is NULL,
+	// silences oscillator.
+	enum { osc_count = 4 };
+	void osc_output( int index, Blip_Buffer* mono );
+	void osc_output( int index, Blip_Buffer* center, Blip_Buffer* left, Blip_Buffer* right );
+	
+	// Reset oscillators and internal state
+	void reset();
+	
+	// Reads and writes at addr must satisfy start_addr <= addr <= end_addr
+	enum { start_addr = 0xFF10 };
+	enum { end_addr   = 0xFF3F };
+	enum { register_count = end_addr - start_addr + 1 };
+	
+	// Write 'data' to address at specified time
+	void write_register( blip_time_t, unsigned addr, int data );
+	
+	// Read from address at specified time
+	int read_register( blip_time_t, unsigned addr );
+	
+	// Run all oscillators up to specified time, end current time frame, then
+	// start a new frame at time 0.
+	void end_frame( blip_time_t );
+	
+	void set_tempo( double );
+	
+public:
+	Gb_Apu();
+private:
+	// noncopyable
+	Gb_Apu( const Gb_Apu& );
+	Gb_Apu& operator = ( const Gb_Apu& );
+	
+	Gb_Osc*     oscs [osc_count];
+	blip_time_t   next_frame_time;
+	blip_time_t   last_time;
+	blip_time_t frame_period;
+	double      volume_unit;
+	int         frame_count;
+	
+	Gb_Square   square1;
+	Gb_Square   square2;
+	Gb_Wave     wave;
+	Gb_Noise    noise;
+	uint8_t regs [register_count];
+	Gb_Square::Synth square_synth; // used by squares
+	Gb_Wave::Synth   other_synth;  // used by wave and noise
+	
+	void update_volume();
+	void run_until( blip_time_t );
+	void write_osc( int index, int reg, int data );
+};
+
+inline void Gb_Apu::output( Blip_Buffer* b ) { output( b, b, b ); }
+	
+inline void Gb_Apu::osc_output( int i, Blip_Buffer* b ) { osc_output( i, b, b, b ); }
+
+inline void Gb_Apu::volume( double vol )
+{
+	volume_unit = 0.60 / osc_count / 15 /*steps*/ / 2 /*?*/ / 8 /*master vol range*/ * vol;
+	update_volume();
+}
+
+#endif
diff --git a/libraries/game-music-emu/gme/Gb_Cpu.cpp b/libraries/game-music-emu/gme/Gb_Cpu.cpp
new file mode 100644
index 000000000..db1abee58
--- /dev/null
+++ b/libraries/game-music-emu/gme/Gb_Cpu.cpp
@@ -0,0 +1,1054 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Gb_Cpu.h"
+
+#include <string.h>
+
+//#include "gb_cpu_log.h"
+
+/* Copyright (C) 2003-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "gb_cpu_io.h"
+
+#include "blargg_source.h"
+
+// Common instructions:
+//
+// 365880   FA      LD  A,IND16
+// 355863   20      JR  NZ
+// 313655   21      LD  HL,IMM
+// 274580   28      JR  Z
+// 252878   FE      CMP IMM
+// 230541   7E      LD  A,(HL)
+// 226209   2A      LD A,(HL+)
+// 217467   CD      CALL
+// 212034   C9      RET
+// 208376   CB      CB prefix
+//
+//  27486   CB 7E   BIT 7,(HL)
+//  15925   CB 76   BIT 6,(HL)
+//  13035   CB 19   RR  C
+//  11557   CB 7F   BIT 7,A
+//  10898   CB 37   SWAP A
+//  10208   CB 66   BIT 4,(HL)
+
+#if BLARGG_NONPORTABLE
+	#define PAGE_OFFSET( addr ) (addr)
+#else
+	#define PAGE_OFFSET( addr ) ((addr) & (page_size - 1))
+#endif
+
+inline void Gb_Cpu::set_code_page( int i, uint8_t* p )
+{
+	state->code_map [i] = p - PAGE_OFFSET( i * (blargg_long) page_size );
+}
+
+void Gb_Cpu::reset( void* unmapped )
+{
+	check( state == &state_ );
+	state = &state_;
+	
+	state_.remain = 0;
+	
+	for ( int i = 0; i < page_count + 1; i++ )
+		set_code_page( i, (uint8_t*) unmapped );
+	
+	memset( &r, 0, sizeof r );
+	//interrupts_enabled = false;
+	
+	blargg_verify_byte_order();
+}
+
+void Gb_Cpu::map_code( gb_addr_t start, unsigned size, void* data )
+{
+	// address range must begin and end on page boundaries
+	require( start % page_size == 0 );
+	require( size % page_size == 0 );
+	
+	unsigned first_page = start / page_size;
+	for ( unsigned i = size / page_size; i--; )
+		set_code_page( first_page + i, (uint8_t*) data + i * page_size );
+}
+
+#define READ( addr )            CPU_READ( this, (addr), s.remain )
+#define WRITE( addr, data )     {CPU_WRITE( this, (addr), (data), s.remain );}
+#define READ_FAST( addr, out )  CPU_READ_FAST( this, (addr), s.remain, out )
+#define READ_PROG( addr )       (s.code_map [(addr) >> page_shift] [PAGE_OFFSET( addr )])
+
+unsigned const z_flag = 0x80;
+unsigned const n_flag = 0x40;
+unsigned const h_flag = 0x20;
+unsigned const c_flag = 0x10;
+
+bool Gb_Cpu::run( blargg_long cycle_count )
+{
+	state_.remain = blargg_ulong (cycle_count + clocks_per_instr) / clocks_per_instr;
+	state_t s;
+	this->state = &s;
+	memcpy( &s, &this->state_, sizeof s );
+	
+#if BLARGG_BIG_ENDIAN
+	#define R8( n ) (r8_ [n]) 
+#elif BLARGG_LITTLE_ENDIAN
+	#define R8( n ) (r8_ [(n) ^ 1]) 
+#else
+	#error "Byte order of CPU must be known"
+#endif
+	
+	union {
+		core_regs_t rg; // individual registers
+		
+		struct {
+			uint16_t bc, de, hl, unused; // pairs
+		} rp;
+		
+		uint8_t r8_ [8]; // indexed registers (use R8 macro due to endian dependence)
+		uint16_t r16 [4]; // indexed pairs
+	};
+	BOOST_STATIC_ASSERT( sizeof rg == 8 && sizeof rp == 8 );
+	
+	rg = r;
+	unsigned pc = r.pc;
+	unsigned sp = r.sp;
+	unsigned flags = r.flags;
+	
+loop:
+	
+	check( (unsigned long) pc < 0x10000 );
+	check( (unsigned long) sp < 0x10000 );
+	check( (flags & ~0xF0) == 0 );
+	
+	uint8_t const* instr = s.code_map [pc >> page_shift];
+	unsigned op;
+	
+	// TODO: eliminate this special case
+	#if BLARGG_NONPORTABLE
+		op = instr [pc];
+		pc++;
+		instr += pc;
+	#else
+		instr += PAGE_OFFSET( pc );
+		op = *instr++;
+		pc++;
+	#endif
+	
+#define GET_ADDR()  GET_LE16( instr )
+	
+	if ( !--s.remain )
+		goto stop;
+	
+	unsigned data;
+	data = *instr;
+	
+	#ifdef GB_CPU_LOG_H
+		gb_cpu_log( "new", pc - 1, op, data, instr [1] );
+	#endif
+	
+	switch ( op )
+	{
+
+// TODO: more efficient way to handle negative branch that wraps PC around
+#define BRANCH( cond )\
+{\
+	pc++;\
+	int offset = (int8_t) data;\
+	if ( !(cond) ) goto loop;\
+	pc = uint16_t (pc + offset);\
+	goto loop;\
+}
+
+// Most Common
+
+	case 0x20: // JR NZ
+		BRANCH( !(flags & z_flag) )
+	
+	case 0x21: // LD HL,IMM (common)
+		rp.hl = GET_ADDR();
+		pc += 2;
+		goto loop;
+	
+	case 0x28: // JR Z
+		BRANCH( flags & z_flag )
+	
+	{
+		unsigned temp;
+	case 0xF0: // LD A,(0xFF00+imm)
+		temp = data | 0xFF00;
+		pc++;
+		goto ld_a_ind_comm;
+	
+	case 0xF2: // LD A,(0xFF00+C)
+		temp = rg.c | 0xFF00;
+		goto ld_a_ind_comm;
+	
+	case 0x0A: // LD A,(BC)
+		temp = rp.bc;
+		goto ld_a_ind_comm;
+	
+	case 0x3A: // LD A,(HL-)
+		temp = rp.hl;
+		rp.hl = temp - 1;
+		goto ld_a_ind_comm;
+	
+	case 0x1A: // LD A,(DE)
+		temp = rp.de;
+		goto ld_a_ind_comm;
+	
+	case 0x2A: // LD A,(HL+) (common)
+		temp = rp.hl;
+		rp.hl = temp + 1;
+		goto ld_a_ind_comm;
+		
+	case 0xFA: // LD A,IND16 (common)
+		temp = GET_ADDR();
+		pc += 2;
+	ld_a_ind_comm:
+		READ_FAST( temp, rg.a );
+		goto loop;
+	}
+	
+	case 0xBE: // CMP (HL)
+		data = READ( rp.hl );
+		goto cmp_comm;
+	
+	case 0xB8: // CMP B
+	case 0xB9: // CMP C
+	case 0xBA: // CMP D
+	case 0xBB: // CMP E
+	case 0xBC: // CMP H
+	case 0xBD: // CMP L
+		data = R8( op & 7 );
+		goto cmp_comm;
+	
+	case 0xFE: // CMP IMM
+		pc++;
+	cmp_comm:
+		op = rg.a;
+		data = op - data;
+	sub_set_flags:
+		flags = ((op & 15) - (data & 15)) & h_flag;
+		flags |= (data >> 4) & c_flag;
+		flags |= n_flag;
+		if ( data & 0xFF )
+			goto loop;
+		flags |= z_flag;
+		goto loop;
+
+	case 0x46: // LD B,(HL)
+	case 0x4E: // LD C,(HL)
+	case 0x56: // LD D,(HL)
+	case 0x5E: // LD E,(HL)
+	case 0x66: // LD H,(HL)
+	case 0x6E: // LD L,(HL)
+	case 0x7E:{// LD A,(HL)
+		unsigned addr = rp.hl;
+		READ_FAST( addr, R8( (op >> 3) & 7 ) );
+		goto loop;
+	}
+	
+	case 0xC4: // CNZ (next-most-common)
+		pc += 2;
+		if ( flags & z_flag )
+			goto loop;
+	call:
+		pc -= 2;
+	case 0xCD: // CALL (most-common)
+		data = pc + 2;
+		pc = GET_ADDR();
+	push:
+		sp = (sp - 1) & 0xFFFF;
+		WRITE( sp, data >> 8 );
+		sp = (sp - 1) & 0xFFFF;
+		WRITE( sp, data & 0xFF );
+		goto loop;
+	
+	case 0xC8: // RNZ (next-most-common)
+		if ( !(flags & z_flag) )
+			goto loop;
+	case 0xC9: // RET (most common)
+	ret:
+		pc = READ( sp );
+		pc += 0x100 * READ( sp + 1 );
+		sp = (sp + 2) & 0xFFFF;
+		goto loop;
+	
+	case 0x00: // NOP
+	case 0x40: // LD B,B
+	case 0x49: // LD C,C
+	case 0x52: // LD D,D
+	case 0x5B: // LD E,E
+	case 0x64: // LD H,H
+	case 0x6D: // LD L,L
+	case 0x7F: // LD A,A
+		goto loop;
+	
+// CB Instructions
+
+	case 0xCB:
+		pc++;
+		// now data is the opcode
+		switch ( data ) {
+			
+		{
+			int temp;
+		case 0x46: // BIT b,(HL)
+		case 0x4E:
+		case 0x56:
+		case 0x5E:
+		case 0x66:
+		case 0x6E:
+		case 0x76:
+		case 0x7E:
+			{
+				unsigned addr = rp.hl;
+				READ_FAST( addr, temp );
+				goto bit_comm;
+			}
+		
+		case 0x40: case 0x41: case 0x42: case 0x43: // BIT b,r
+		case 0x44: case 0x45: case 0x47: case 0x48:
+		case 0x49: case 0x4A: case 0x4B: case 0x4C:
+		case 0x4D: case 0x4F: case 0x50: case 0x51:
+		case 0x52: case 0x53: case 0x54: case 0x55:
+		case 0x57: case 0x58: case 0x59: case 0x5A:
+		case 0x5B: case 0x5C: case 0x5D: case 0x5F:
+		case 0x60: case 0x61: case 0x62: case 0x63:
+		case 0x64: case 0x65: case 0x67: case 0x68:
+		case 0x69: case 0x6A: case 0x6B: case 0x6C:
+		case 0x6D: case 0x6F: case 0x70: case 0x71:
+		case 0x72: case 0x73: case 0x74: case 0x75:
+		case 0x77: case 0x78: case 0x79: case 0x7A:
+		case 0x7B: case 0x7C: case 0x7D: case 0x7F:
+			temp = R8( data & 7 );
+		bit_comm:
+			int bit = (~data >> 3) & 7;
+			flags &= ~n_flag;
+			flags |= h_flag | z_flag;
+			flags ^= (temp << bit) & z_flag;
+			goto loop;
+		}
+		
+		case 0x86: // RES b,(HL)
+		case 0x8E:
+		case 0x96:
+		case 0x9E:
+		case 0xA6:
+		case 0xAE:
+		case 0xB6:
+		case 0xBE:
+		case 0xC6: // SET b,(HL)
+		case 0xCE:
+		case 0xD6:
+		case 0xDE:
+		case 0xE6:
+		case 0xEE:
+		case 0xF6:
+		case 0xFE: {
+			int temp = READ( rp.hl );
+			int bit = 1 << ((data >> 3) & 7);
+			temp &= ~bit;
+			if ( !(data & 0x40) )
+				bit = 0;
+			WRITE( rp.hl, temp | bit );
+			goto loop;
+		}
+		
+		case 0xC0: case 0xC1: case 0xC2: case 0xC3: // SET b,r
+		case 0xC4: case 0xC5: case 0xC7: case 0xC8:
+		case 0xC9: case 0xCA: case 0xCB: case 0xCC:
+		case 0xCD: case 0xCF: case 0xD0: case 0xD1:
+		case 0xD2: case 0xD3: case 0xD4: case 0xD5:
+		case 0xD7: case 0xD8: case 0xD9: case 0xDA:
+		case 0xDB: case 0xDC: case 0xDD: case 0xDF:
+		case 0xE0: case 0xE1: case 0xE2: case 0xE3:
+		case 0xE4: case 0xE5: case 0xE7: case 0xE8:
+		case 0xE9: case 0xEA: case 0xEB: case 0xEC:
+		case 0xED: case 0xEF: case 0xF0: case 0xF1:
+		case 0xF2: case 0xF3: case 0xF4: case 0xF5:
+		case 0xF7: case 0xF8: case 0xF9: case 0xFA:
+		case 0xFB: case 0xFC: case 0xFD: case 0xFF:
+			R8( data & 7 ) |= 1 << ((data >> 3) & 7);
+			goto loop;
+
+		case 0x80: case 0x81: case 0x82: case 0x83: // RES b,r
+		case 0x84: case 0x85: case 0x87: case 0x88:
+		case 0x89: case 0x8A: case 0x8B: case 0x8C:
+		case 0x8D: case 0x8F: case 0x90: case 0x91:
+		case 0x92: case 0x93: case 0x94: case 0x95:
+		case 0x97: case 0x98: case 0x99: case 0x9A:
+		case 0x9B: case 0x9C: case 0x9D: case 0x9F:
+		case 0xA0: case 0xA1: case 0xA2: case 0xA3:
+		case 0xA4: case 0xA5: case 0xA7: case 0xA8:
+		case 0xA9: case 0xAA: case 0xAB: case 0xAC:
+		case 0xAD: case 0xAF: case 0xB0: case 0xB1:
+		case 0xB2: case 0xB3: case 0xB4: case 0xB5:
+		case 0xB7: case 0xB8: case 0xB9: case 0xBA:
+		case 0xBB: case 0xBC: case 0xBD: case 0xBF:
+			R8( data & 7 ) &= ~(1 << ((data >> 3) & 7));
+			goto loop;
+		
+		{
+			int temp;
+		case 0x36: // SWAP (HL)
+			temp = READ( rp.hl );
+			goto swap_comm;
+		
+		case 0x30: // SWAP B
+		case 0x31: // SWAP C
+		case 0x32: // SWAP D
+		case 0x33: // SWAP E
+		case 0x34: // SWAP H
+		case 0x35: // SWAP L
+		case 0x37: // SWAP A
+			temp = R8( data & 7 );
+		swap_comm:
+			op = (temp >> 4) | (temp << 4);
+			flags = 0;
+			goto shift_comm;
+		}
+		
+// Shift/Rotate
+
+		case 0x06: // RLC (HL)
+		case 0x16: // RL (HL)
+		case 0x26: // SLA (HL)
+			op = READ( rp.hl );
+			goto rl_comm;
+		
+		case 0x20: case 0x21: case 0x22: case 0x23: case 0x24: case 0x25: case 0x27: // SLA A
+		case 0x00: case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: case 0x07: // RLC A
+		case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x17: // RL A
+			op = R8( data & 7 );
+			goto rl_comm;
+		
+		case 0x3E: // SRL (HL)
+			data += 0x10; // bump up to 0x4n to avoid preserving sign bit
+		case 0x1E: // RR (HL)
+		case 0x0E: // RRC (HL)
+		case 0x2E: // SRA (HL)
+			op = READ( rp.hl );
+			goto rr_comm;
+		
+		case 0x38: case 0x39: case 0x3A: case 0x3B: case 0x3C: case 0x3D: case 0x3F: // SRL A
+			data += 0x10; // bump up to 0x4n
+		case 0x18: case 0x19: case 0x1A: case 0x1B: case 0x1C: case 0x1D: case 0x1F: // RR A
+		case 0x08: case 0x09: case 0x0A: case 0x0B: case 0x0C: case 0x0D: case 0x0F: // RRC A
+		case 0x28: case 0x29: case 0x2A: case 0x2B: case 0x2C: case 0x2D: case 0x2F: // SRA A
+			op = R8( data & 7 );
+			goto rr_comm;
+		
+	} // CB op
+	assert( false ); // unhandled CB op
+
+	case 0x07: // RLCA
+	case 0x17: // RLA
+		data = op;
+		op = rg.a;
+	rl_comm:
+		op <<= 1;
+		op |= ((data & flags) >> 4) & 1; // RL and carry is set
+		flags = (op >> 4) & c_flag; // C = bit shifted out
+		if ( data < 0x10 ) // RLC
+			op |= op >> 8;
+		// SLA doesn't fill lower bit
+		goto shift_comm;
+	
+	case 0x0F: // RRCA
+	case 0x1F: // RRA
+		data = op;
+		op = rg.a;
+	rr_comm:
+		op |= (data & flags) << 4; // RR and carry is set
+		flags = (op << 4) & c_flag; // C = bit shifted out
+		if ( data < 0x10 ) // RRC
+			op |= op << 8;
+		op >>= 1;
+		if ( data & 0x20 ) // SRA propagates sign bit
+			op |= (op << 1) & 0x80;
+	shift_comm:
+		data &= 7;
+		if ( !(op & 0xFF) )
+			flags |= z_flag;
+		if ( data == 6 )
+			goto write_hl_op_ff;
+		R8( data ) = op;
+		goto loop;
+
+// Load
+
+	case 0x70: // LD (HL),B
+	case 0x71: // LD (HL),C
+	case 0x72: // LD (HL),D
+	case 0x73: // LD (HL),E
+	case 0x74: // LD (HL),H
+	case 0x75: // LD (HL),L
+	case 0x77: // LD (HL),A
+		op = R8( op & 7 );
+	write_hl_op_ff:
+		WRITE( rp.hl, op & 0xFF );
+		goto loop;
+
+	case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x47: // LD r,r
+	case 0x48: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4F:
+	case 0x50: case 0x51: case 0x53: case 0x54: case 0x55: case 0x57:
+	case 0x58: case 0x59: case 0x5A: case 0x5C: case 0x5D: case 0x5F:
+	case 0x60: case 0x61: case 0x62: case 0x63: case 0x65: case 0x67:
+	case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6F:
+	case 0x78: case 0x79: case 0x7A: case 0x7B: case 0x7C: case 0x7D:
+		R8( (op >> 3) & 7 ) = R8( op & 7 );
+		goto loop;
+
+	case 0x08: // LD IND16,SP
+		data = GET_ADDR();
+		pc += 2;
+		WRITE( data, sp&0xFF );
+		data++;
+		WRITE( data, sp >> 8 );
+		goto loop;
+	
+	case 0xF9: // LD SP,HL
+		sp = rp.hl;
+		goto loop;
+
+	case 0x31: // LD SP,IMM
+		sp = GET_ADDR();
+		pc += 2;
+		goto loop;
+	
+	case 0x01: // LD BC,IMM
+	case 0x11: // LD DE,IMM
+		r16 [op >> 4] = GET_ADDR();
+		pc += 2;
+		goto loop;
+	
+	{
+		unsigned temp;
+	case 0xE0: // LD (0xFF00+imm),A
+		temp = data | 0xFF00;
+		pc++;
+		goto write_data_rg_a;
+	
+	case 0xE2: // LD (0xFF00+C),A
+		temp = rg.c | 0xFF00;
+		goto write_data_rg_a;
+
+	case 0x32: // LD (HL-),A
+		temp = rp.hl;
+		rp.hl = temp - 1;
+		goto write_data_rg_a;
+	
+	case 0x02: // LD (BC),A
+		temp = rp.bc;
+		goto write_data_rg_a;
+	
+	case 0x12: // LD (DE),A
+		temp = rp.de;
+		goto write_data_rg_a;
+	
+	case 0x22: // LD (HL+),A
+		temp = rp.hl;
+		rp.hl = temp + 1;
+		goto write_data_rg_a;
+		
+	case 0xEA: // LD IND16,A (common)
+		temp = GET_ADDR();
+		pc += 2;
+	write_data_rg_a:
+		WRITE( temp, rg.a );
+		goto loop;
+	}
+	
+	case 0x06: // LD B,IMM
+		rg.b = data;
+		pc++;
+		goto loop;
+	
+	case 0x0E: // LD C,IMM
+		rg.c = data;
+		pc++;
+		goto loop;
+	
+	case 0x16: // LD D,IMM
+		rg.d = data;
+		pc++;
+		goto loop;
+	
+	case 0x1E: // LD E,IMM
+		rg.e = data;
+		pc++;
+		goto loop;
+	
+	case 0x26: // LD H,IMM
+		rg.h = data;
+		pc++;
+		goto loop;
+	
+	case 0x2E: // LD L,IMM
+		rg.l = data;
+		pc++;
+		goto loop;
+	
+	case 0x36: // LD (HL),IMM
+		WRITE( rp.hl, data );
+		pc++;
+		goto loop;
+	
+	case 0x3E: // LD A,IMM
+		rg.a = data;
+		pc++;
+		goto loop;
+
+// Increment/Decrement
+
+	case 0x03: // INC BC
+	case 0x13: // INC DE
+	case 0x23: // INC HL
+		r16 [op >> 4]++;
+		goto loop;
+	
+	case 0x33: // INC SP
+		sp = (sp + 1) & 0xFFFF;
+		goto loop;
+
+	case 0x0B: // DEC BC
+	case 0x1B: // DEC DE
+	case 0x2B: // DEC HL
+		r16 [op >> 4]--;
+		goto loop;
+	
+	case 0x3B: // DEC SP
+		sp = (sp - 1) & 0xFFFF;
+		goto loop;
+	
+	case 0x34: // INC (HL)
+		op = rp.hl;
+		data = READ( op );
+		data++;
+		WRITE( op, data & 0xFF );
+		goto inc_comm;
+	
+	case 0x04: // INC B
+	case 0x0C: // INC C (common)
+	case 0x14: // INC D
+	case 0x1C: // INC E
+	case 0x24: // INC H
+	case 0x2C: // INC L
+	case 0x3C: // INC A
+		op = (op >> 3) & 7;
+		R8( op ) = data = R8( op ) + 1;
+	inc_comm:
+		flags = (flags & c_flag) | (((data & 15) - 1) & h_flag) | ((data >> 1) & z_flag);
+		goto loop;
+	
+	case 0x35: // DEC (HL)
+		op = rp.hl;
+		data = READ( op );
+		data--;
+		WRITE( op, data & 0xFF );
+		goto dec_comm;
+	
+	case 0x05: // DEC B
+	case 0x0D: // DEC C
+	case 0x15: // DEC D
+	case 0x1D: // DEC E
+	case 0x25: // DEC H
+	case 0x2D: // DEC L
+	case 0x3D: // DEC A
+		op = (op >> 3) & 7;
+		data = R8( op ) - 1;
+		R8( op ) = data;
+	dec_comm:
+		flags = (flags & c_flag) | n_flag | (((data & 15) + 0x31) & h_flag);
+		if ( data & 0xFF )
+			goto loop;
+		flags |= z_flag;
+		goto loop;
+
+// Add 16-bit
+
+	{
+		blargg_ulong temp; // need more than 16 bits for carry
+		unsigned prev;
+		
+	case 0xF8: // LD HL,SP+imm
+		temp = int8_t (data); // sign-extend to 16 bits
+		pc++;
+		flags = 0;
+		temp += sp;
+		prev = sp;
+		goto add_16_hl;
+	
+	case 0xE8: // ADD SP,IMM
+		temp = int8_t (data); // sign-extend to 16 bits
+		pc++;
+		flags = 0;
+		temp += sp;
+		prev = sp;
+		sp = temp & 0xFFFF;
+		goto add_16_comm;
+
+	case 0x39: // ADD HL,SP
+		temp = sp;
+		goto add_hl_comm;
+	
+	case 0x09: // ADD HL,BC
+	case 0x19: // ADD HL,DE
+	case 0x29: // ADD HL,HL
+		temp = r16 [op >> 4];
+	add_hl_comm:
+		prev = rp.hl;
+		temp += prev;
+		flags &= z_flag;
+	add_16_hl:
+		rp.hl = temp;
+	add_16_comm:
+		flags |= (temp >> 12) & c_flag;
+		flags |= (((temp & 0x0FFF) - (prev & 0x0FFF)) >> 7) & h_flag;
+		goto loop;
+	}
+	
+	case 0x86: // ADD (HL)
+		data = READ( rp.hl );
+		goto add_comm;
+	
+	case 0x80: // ADD B
+	case 0x81: // ADD C
+	case 0x82: // ADD D
+	case 0x83: // ADD E
+	case 0x84: // ADD H
+	case 0x85: // ADD L
+	case 0x87: // ADD A
+		data = R8( op & 7 );
+		goto add_comm;
+	
+	case 0xC6: // ADD IMM
+		pc++;
+	add_comm:
+		flags = rg.a;
+		data += flags;
+		flags = ((data & 15) - (flags & 15)) & h_flag;
+		flags |= (data >> 4) & c_flag;
+		rg.a = data;
+		if ( data & 0xFF )
+			goto loop;
+		flags |= z_flag;
+		goto loop;
+
+// Add/Subtract
+
+	case 0x8E: // ADC (HL)
+		data = READ( rp.hl );
+		goto adc_comm;
+	
+	case 0x88: // ADC B
+	case 0x89: // ADC C
+	case 0x8A: // ADC D
+	case 0x8B: // ADC E
+	case 0x8C: // ADC H
+	case 0x8D: // ADC L
+	case 0x8F: // ADC A
+		data = R8( op & 7 );
+		goto adc_comm;
+	
+	case 0xCE: // ADC IMM
+		pc++;
+	adc_comm:
+		data += (flags >> 4) & 1;
+		data &= 0xFF; // to do: does carry get set when sum + carry = 0x100?
+		goto add_comm;
+
+	case 0x96: // SUB (HL)
+		data = READ( rp.hl );
+		goto sub_comm;
+	
+	case 0x90: // SUB B
+	case 0x91: // SUB C
+	case 0x92: // SUB D
+	case 0x93: // SUB E
+	case 0x94: // SUB H
+	case 0x95: // SUB L
+	case 0x97: // SUB A
+		data = R8( op & 7 );
+		goto sub_comm;
+	
+	case 0xD6: // SUB IMM
+		pc++;
+	sub_comm:
+		op = rg.a;
+		data = op - data;
+		rg.a = data;
+		goto sub_set_flags;
+
+	case 0x9E: // SBC (HL)
+		data = READ( rp.hl );
+		goto sbc_comm;
+	
+	case 0x98: // SBC B
+	case 0x99: // SBC C
+	case 0x9A: // SBC D
+	case 0x9B: // SBC E
+	case 0x9C: // SBC H
+	case 0x9D: // SBC L
+	case 0x9F: // SBC A
+		data = R8( op & 7 );
+		goto sbc_comm;
+	
+	case 0xDE: // SBC IMM
+		pc++;
+	sbc_comm:
+		data += (flags >> 4) & 1;
+		data &= 0xFF; // to do: does carry get set when sum + carry = 0x100?
+		goto sub_comm;
+
+// Logical
+
+	case 0xA0: // AND B
+	case 0xA1: // AND C
+	case 0xA2: // AND D
+	case 0xA3: // AND E
+	case 0xA4: // AND H
+	case 0xA5: // AND L
+		data = R8( op & 7 );
+		goto and_comm;
+	
+	case 0xA6: // AND (HL)
+		data = READ( rp.hl );
+		pc--;
+	case 0xE6: // AND IMM
+		pc++;
+	and_comm:
+		rg.a &= data;
+	case 0xA7: // AND A
+		flags = h_flag | (((rg.a - 1) >> 1) & z_flag);
+		goto loop;
+
+	case 0xB0: // OR B
+	case 0xB1: // OR C
+	case 0xB2: // OR D
+	case 0xB3: // OR E
+	case 0xB4: // OR H
+	case 0xB5: // OR L
+		data = R8( op & 7 );
+		goto or_comm;
+	
+	case 0xB6: // OR (HL)
+		data = READ( rp.hl );
+		pc--;
+	case 0xF6: // OR IMM
+		pc++;
+	or_comm:
+		rg.a |= data;
+	case 0xB7: // OR A
+		flags = ((rg.a - 1) >> 1) & z_flag;
+		goto loop;
+
+	case 0xA8: // XOR B
+	case 0xA9: // XOR C
+	case 0xAA: // XOR D
+	case 0xAB: // XOR E
+	case 0xAC: // XOR H
+	case 0xAD: // XOR L
+		data = R8( op & 7 );
+		goto xor_comm;
+	
+	case 0xAE: // XOR (HL)
+		data = READ( rp.hl );
+		pc--;
+	case 0xEE: // XOR IMM
+		pc++;
+	xor_comm:
+		data ^= rg.a;
+		rg.a = data;
+		data--;
+		flags = (data >> 1) & z_flag;
+		goto loop;
+	
+	case 0xAF: // XOR A
+		rg.a = 0;
+		flags = z_flag;
+		goto loop;
+
+// Stack
+
+	case 0xF1: // POP FA
+	case 0xC1: // POP BC
+	case 0xD1: // POP DE
+	case 0xE1: // POP HL (common)
+		data = READ( sp );
+		r16 [(op >> 4) & 3] = data + 0x100 * READ( sp + 1 );
+		sp = (sp + 2) & 0xFFFF;
+		if ( op != 0xF1 )
+			goto loop;
+		flags = rg.flags & 0xF0;
+		goto loop;
+	
+	case 0xC5: // PUSH BC
+		data = rp.bc;
+		goto push;
+	
+	case 0xD5: // PUSH DE
+		data = rp.de;
+		goto push;
+	
+	case 0xE5: // PUSH HL
+		data = rp.hl;
+		goto push;
+	
+	case 0xF5: // PUSH FA
+		data = (flags << 8) | rg.a;
+		goto push;
+
+// Flow control
+	
+	case 0xFF:
+		if ( pc == idle_addr + 1 )
+			goto stop;
+	case 0xC7: case 0xCF: case 0xD7: case 0xDF:  // RST
+	case 0xE7: case 0xEF: case 0xF7:
+		data = pc;
+		pc = (op & 0x38) + rst_base;
+		goto push;
+	
+	case 0xCC: // CZ
+		pc += 2;
+		if ( flags & z_flag )
+			goto call;
+		goto loop;
+	
+	case 0xD4: // CNC
+		pc += 2;
+		if ( !(flags & c_flag) )
+			goto call;
+		goto loop;
+	
+	case 0xDC: // CC
+		pc += 2;
+		if ( flags & c_flag )
+			goto call;
+		goto loop;
+
+	case 0xD9: // RETI
+		//interrupts_enabled = 1;
+		goto ret;
+	
+	case 0xC0: // RZ
+		if ( !(flags & z_flag) )
+			goto ret;
+		goto loop;
+	
+	case 0xD0: // RNC
+		if ( !(flags & c_flag) )
+			goto ret;
+		goto loop;
+	
+	case 0xD8: // RC
+		if ( flags & c_flag )
+			goto ret;
+		goto loop;
+
+	case 0x18: // JR
+		BRANCH( true )
+	
+	case 0x30: // JR NC
+		BRANCH( !(flags & c_flag) )
+	
+	case 0x38: // JR C
+		BRANCH( flags & c_flag )
+	
+	case 0xE9: // JP_HL
+		pc = rp.hl;
+		goto loop;
+
+	case 0xC3: // JP (next-most-common)
+		pc = GET_ADDR();
+		goto loop;
+	
+	case 0xC2: // JP NZ
+		pc += 2;
+		if ( !(flags & z_flag) )
+			goto jp_taken;
+		goto loop;
+	
+	case 0xCA: // JP Z (most common)
+		pc += 2;
+		if ( !(flags & z_flag) )
+			goto loop;
+	jp_taken:
+		pc -= 2;
+		pc = GET_ADDR();
+		goto loop;
+	
+	case 0xD2: // JP NC
+		pc += 2;
+		if ( !(flags & c_flag) )
+			goto jp_taken;
+		goto loop;
+	
+	case 0xDA: // JP C
+		pc += 2;
+		if ( flags & c_flag )
+			goto jp_taken;
+		goto loop;
+
+// Flags
+
+	case 0x2F: // CPL
+		rg.a = ~rg.a;
+		flags |= n_flag | h_flag;
+		goto loop;
+
+	case 0x3F: // CCF
+		flags = (flags ^ c_flag) & ~(n_flag | h_flag);
+		goto loop;
+
+	case 0x37: // SCF
+		flags = (flags | c_flag) & ~(n_flag | h_flag);
+		goto loop;
+
+	case 0xF3: // DI
+		//interrupts_enabled = 0;
+		goto loop;
+
+	case 0xFB: // EI
+		//interrupts_enabled = 1;
+		goto loop;
+
+// Special
+
+	case 0xDD: case 0xD3: case 0xDB: case 0xE3: case 0xE4: // ?
+	case 0xEB: case 0xEC: case 0xF4: case 0xFD: case 0xFC:
+	case 0x10: // STOP
+	case 0x27: // DAA (I'll have to implement this eventually...)
+	case 0xBF:
+	case 0xED: // Z80 prefix
+	case 0x76: // HALT
+		s.remain++;
+		goto stop;
+	}
+	
+	// If this fails then the case above is missing an opcode
+	assert( false );
+	
+stop:
+	pc--;
+	
+	// copy state back
+	STATIC_CAST(core_regs_t&,r) = rg;
+	r.pc = pc;
+	r.sp = sp;
+	r.flags = flags;
+	
+	this->state = &state_;
+	memcpy( &this->state_, &s, sizeof this->state_ );
+	
+	return s.remain > 0;
+}
diff --git a/libraries/game-music-emu/gme/Gb_Cpu.h b/libraries/game-music-emu/gme/Gb_Cpu.h
new file mode 100644
index 000000000..d3df30cac
--- /dev/null
+++ b/libraries/game-music-emu/gme/Gb_Cpu.h
@@ -0,0 +1,91 @@
+// Nintendo Game Boy CPU emulator
+// Treats every instruction as taking 4 cycles
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef GB_CPU_H
+#define GB_CPU_H
+
+#include "blargg_common.h"
+#include "blargg_endian.h"
+
+typedef unsigned gb_addr_t; // 16-bit CPU address
+
+class Gb_Cpu {
+	enum { clocks_per_instr = 4 };
+public:
+	// Clear registers and map all pages to unmapped
+	void reset( void* unmapped = 0 );
+	
+	// Map code memory (memory accessed via the program counter). Start and size
+	// must be multiple of page_size.
+	enum { page_size = 0x2000 };
+	void map_code( gb_addr_t start, unsigned size, void* code );
+	
+	uint8_t* get_code( gb_addr_t );
+	
+	// Push a byte on the stack
+	void push_byte( int );
+	
+	// Game Boy Z80 registers. *Not* kept updated during a call to run().
+	struct core_regs_t {
+	#if BLARGG_BIG_ENDIAN
+		uint8_t b, c, d, e, h, l, flags, a;
+	#else
+		uint8_t c, b, e, d, l, h, a, flags;
+	#endif
+	};
+	
+	struct registers_t : core_regs_t {
+		long pc; // more than 16 bits to allow overflow detection
+		uint16_t sp;
+	};
+	registers_t r;
+	
+	// Interrupt enable flag set by EI and cleared by DI
+	//bool interrupts_enabled; // unused
+	
+	// Base address for RST vectors (normally 0)
+	gb_addr_t rst_base;
+	
+	// If CPU executes opcode 0xFF at this address, it treats as illegal instruction
+	enum { idle_addr = 0xF00D };
+	
+	// Run CPU for at least 'count' cycles and return false, or return true if
+	// illegal instruction is encountered.
+	bool run( blargg_long count );
+	
+	// Number of clock cycles remaining for most recent run() call
+	blargg_long remain() const { return state->remain * clocks_per_instr; }
+	
+	// Can read this many bytes past end of a page
+	enum { cpu_padding = 8 };
+	
+public:
+	Gb_Cpu() : rst_base( 0 ) { state = &state_; }
+	enum { page_shift = 13 };
+	enum { page_count = 0x10000 >> page_shift };
+private:
+	// noncopyable
+	Gb_Cpu( const Gb_Cpu& );
+	Gb_Cpu& operator = ( const Gb_Cpu& );
+	
+	struct state_t {
+		uint8_t* code_map [page_count + 1];
+		blargg_long remain;
+	};
+	state_t* state; // points to state_ or a local copy within run()
+	state_t state_;
+	
+	void set_code_page( int, uint8_t* );
+};
+
+inline uint8_t* Gb_Cpu::get_code( gb_addr_t addr )
+{
+	return state->code_map [addr >> page_shift] + addr
+	#if !BLARGG_NONPORTABLE
+		% (unsigned) page_size
+	#endif
+	;
+}
+
+#endif
diff --git a/libraries/game-music-emu/gme/Gb_Oscs.cpp b/libraries/game-music-emu/gme/Gb_Oscs.cpp
new file mode 100644
index 000000000..735653fa9
--- /dev/null
+++ b/libraries/game-music-emu/gme/Gb_Oscs.cpp
@@ -0,0 +1,336 @@
+// Gb_Snd_Emu 0.1.5. http://www.slack.net/~ant/
+
+#include "Gb_Apu.h"
+
+#include <string.h>
+
+/* Copyright (C) 2003-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+// Gb_Osc
+
+void Gb_Osc::reset()
+{
+	delay = 0;
+	last_amp = 0;
+	length = 0;
+	output_select = 3;
+	output = outputs [output_select];
+}
+
+void Gb_Osc::clock_length()
+{
+	if ( (regs [4] & len_enabled_mask) && length )
+		length--;
+}
+
+// Gb_Env
+
+void Gb_Env::clock_envelope()
+{
+	if ( env_delay && !--env_delay )
+	{
+		env_delay = regs [2] & 7;
+		int v = volume - 1 + (regs [2] >> 2 & 2);
+		if ( (unsigned) v < 15 )
+			volume = v;
+	}
+}
+
+bool Gb_Env::write_register( int reg, int data )
+{
+	switch ( reg )
+	{
+	case 1:
+		length = 64 - (regs [1] & 0x3F);
+		break;
+	
+	case 2:
+		if ( !(data >> 4) )
+			enabled = false;
+		break;
+	
+	case 4:
+		if ( data & trigger )
+		{
+			env_delay = regs [2] & 7;
+			volume = regs [2] >> 4;
+			enabled = true;
+			if ( length == 0 )
+				length = 64;
+			return true;
+		}
+	}
+	return false;
+}
+
+// Gb_Square
+
+void Gb_Square::reset()
+{
+	phase = 0;
+	sweep_freq = 0;
+	sweep_delay = 0;
+	Gb_Env::reset();
+}
+
+void Gb_Square::clock_sweep()
+{
+	int sweep_period = (regs [0] & period_mask) >> 4;
+	if ( sweep_period && sweep_delay && !--sweep_delay )
+	{
+		sweep_delay = sweep_period;
+		regs [3] = sweep_freq & 0xFF;
+		regs [4] = (regs [4] & ~0x07) | (sweep_freq >> 8 & 0x07);
+		
+		int offset = sweep_freq >> (regs [0] & shift_mask);
+		if ( regs [0] & 0x08 )
+			offset = -offset;
+		sweep_freq += offset;
+		
+		if ( sweep_freq < 0 )
+		{
+			sweep_freq = 0;
+		}
+		else if ( sweep_freq >= 2048 )
+		{
+			sweep_delay = 0; // don't modify channel frequency any further
+			sweep_freq = 2048; // silence sound immediately
+		}
+	}
+}
+
+void Gb_Square::run( blip_time_t time, blip_time_t end_time, int playing )
+{
+	if ( sweep_freq == 2048 )
+		playing = false;
+	
+	static unsigned char const table [4] = { 1, 2, 4, 6 };
+	int const duty = table [regs [1] >> 6];
+	int amp = volume & playing;
+	if ( phase >= duty )
+		amp = -amp;
+	
+	int frequency = this->frequency();
+	if ( unsigned (frequency - 1) > 2040 ) // frequency < 1 || frequency > 2041
+	{
+		// really high frequency results in DC at half volume
+		amp = volume >> 1;
+		playing = false;
+	}
+	
+	{
+		int delta = amp - last_amp;
+		if ( delta )
+		{
+			last_amp = amp;
+			synth->offset( time, delta, output );
+		}
+	}
+	
+	time += delay;
+	if ( !playing )
+		time = end_time;
+	
+	if ( time < end_time )
+	{
+		int const period = (2048 - frequency) * 4;
+		Blip_Buffer* const output = this->output;
+		int phase = this->phase;
+		int delta = amp * 2;
+		do
+		{
+			phase = (phase + 1) & 7;
+			if ( phase == 0 || phase == duty )
+			{
+				delta = -delta;
+				synth->offset_inline( time, delta, output );
+			}
+			time += period;
+		}
+		while ( time < end_time );
+		
+		this->phase = phase;
+		last_amp = delta >> 1;
+	}
+	delay = time - end_time;
+}
+
+// Gb_Noise
+
+void Gb_Noise::run( blip_time_t time, blip_time_t end_time, int playing )
+{
+	int amp = volume & playing;
+	int tap = 13 - (regs [3] & 8);
+	if ( bits >> tap & 2 )
+		amp = -amp;
+	
+	{
+		int delta = amp - last_amp;
+		if ( delta )
+		{
+			last_amp = amp;
+			synth->offset( time, delta, output );
+		}
+	}
+	
+	time += delay;
+	if ( !playing )
+		time = end_time;
+	
+	if ( time < end_time )
+	{
+		static unsigned char const table [8] = { 8, 16, 32, 48, 64, 80, 96, 112 };
+		int period = table [regs [3] & 7] << (regs [3] >> 4);
+		
+		// keep parallel resampled time to eliminate time conversion in the loop
+		Blip_Buffer* const output = this->output;
+		const blip_resampled_time_t resampled_period =
+				output->resampled_duration( period );
+		blip_resampled_time_t resampled_time = output->resampled_time( time );
+		unsigned bits = this->bits;
+		int delta = amp * 2;
+		
+		do
+		{
+			unsigned changed = (bits >> tap) + 1;
+			time += period;
+			bits <<= 1;
+			if ( changed & 2 )
+			{
+				delta = -delta;
+				bits |= 1;
+				synth->offset_resampled( resampled_time, delta, output );
+			}
+			resampled_time += resampled_period;
+		}
+		while ( time < end_time );
+		
+		this->bits = bits;
+		last_amp = delta >> 1;
+	}
+	delay = time - end_time;
+}
+
+// Gb_Wave
+
+inline void Gb_Wave::write_register( int reg, int data )
+{
+	switch ( reg )
+	{
+	case 0:
+		if ( !(data & 0x80) )
+			enabled = false;
+		break;
+	
+	case 1:
+		length = 256 - regs [1];
+		break;
+	
+	case 2:
+		volume = data >> 5 & 3;
+		break;
+	
+	case 4:
+		if ( data & trigger & regs [0] )
+		{
+			wave_pos = 0;
+			enabled = true;
+			if ( length == 0 )
+				length = 256;
+		}
+	}
+}
+
+void Gb_Wave::run( blip_time_t time, blip_time_t end_time, int playing )
+{
+	int volume_shift = (volume - 1) & 7; // volume = 0 causes shift = 7
+	int frequency;
+	{
+		int amp = (wave [wave_pos] >> volume_shift & playing) * 2;
+		
+		frequency = this->frequency();
+		if ( unsigned (frequency - 1) > 2044 ) // frequency < 1 || frequency > 2045
+		{
+			amp = 30 >> volume_shift & playing;
+			playing = false;
+		}
+		
+		int delta = amp - last_amp;
+		if ( delta )
+		{
+			last_amp = amp;
+			synth->offset( time, delta, output );
+		}
+	}
+	
+	time += delay;
+	if ( !playing )
+		time = end_time;
+	
+	if ( time < end_time )
+	{
+		Blip_Buffer* const output = this->output;
+		int const period = (2048 - frequency) * 2;
+	 	int wave_pos = (this->wave_pos + 1) & (wave_size - 1);
+	 	
+		do
+		{
+			int amp = (wave [wave_pos] >> volume_shift) * 2;
+			wave_pos = (wave_pos + 1) & (wave_size - 1);
+			int delta = amp - last_amp;
+			if ( delta )
+			{
+				last_amp = amp;
+				synth->offset_inline( time, delta, output );
+			}
+			time += period;
+		}
+		while ( time < end_time );
+		
+		this->wave_pos = (wave_pos - 1) & (wave_size - 1);
+	}
+	delay = time - end_time;
+}
+
+// Gb_Apu::write_osc
+
+void Gb_Apu::write_osc( int index, int reg, int data )
+{
+	reg -= index * 5;
+	Gb_Square* sq = &square2;
+	switch ( index )
+	{
+	case 0:
+		sq = &square1;
+	case 1:
+		if ( sq->write_register( reg, data ) && index == 0 )
+		{
+			square1.sweep_freq = square1.frequency();
+			if ( (regs [0] & sq->period_mask) && (regs [0] & sq->shift_mask) )
+			{
+				square1.sweep_delay = 1; // cause sweep to recalculate now
+				square1.clock_sweep();
+			}
+		}
+		break;
+	
+	case 2:
+		wave.write_register( reg, data );
+		break;
+	
+	case 3:
+		if ( noise.write_register( reg, data ) )
+			noise.bits = 0x7FFF;
+	}
+}
diff --git a/libraries/game-music-emu/gme/Gb_Oscs.h b/libraries/game-music-emu/gme/Gb_Oscs.h
new file mode 100644
index 000000000..8cb026c3e
--- /dev/null
+++ b/libraries/game-music-emu/gme/Gb_Oscs.h
@@ -0,0 +1,83 @@
+// Private oscillators used by Gb_Apu
+
+// Gb_Snd_Emu 0.1.5
+#ifndef GB_OSCS_H
+#define GB_OSCS_H
+
+#include "blargg_common.h"
+#include "Blip_Buffer.h"
+
+struct Gb_Osc
+{
+	enum { trigger = 0x80 };
+	enum { len_enabled_mask = 0x40 };
+	
+	Blip_Buffer* outputs [4]; // NULL, right, left, center
+	Blip_Buffer* output;
+	int output_select;
+	uint8_t* regs; // osc's 5 registers
+	
+	int delay;
+	int last_amp;
+	int volume;
+	int length;
+	int enabled;
+	
+	void reset();
+	void clock_length();
+	int frequency() const { return (regs [4] & 7) * 0x100 + regs [3]; }
+};
+
+struct Gb_Env : Gb_Osc
+{
+	int env_delay;
+	
+	void reset();
+	void clock_envelope();
+	bool write_register( int, int );
+};
+
+struct Gb_Square : Gb_Env
+{
+	enum { period_mask = 0x70 };
+	enum { shift_mask  = 0x07 };
+	
+	typedef Blip_Synth<blip_good_quality,1> Synth;
+	Synth const* synth;
+	int sweep_delay;
+	int sweep_freq;
+	int phase;
+	
+	void reset();
+	void clock_sweep();
+	void run( blip_time_t, blip_time_t, int playing );
+};
+
+struct Gb_Noise : Gb_Env
+{
+	typedef Blip_Synth<blip_med_quality,1> Synth;
+	Synth const* synth;
+	unsigned bits;
+	
+	void run( blip_time_t, blip_time_t, int playing );
+};
+
+struct Gb_Wave : Gb_Osc
+{
+	typedef Blip_Synth<blip_med_quality,1> Synth;
+	Synth const* synth;
+	int wave_pos;
+	enum { wave_size = 32 };
+	uint8_t wave [wave_size];
+	
+	void write_register( int, int );
+	void run( blip_time_t, blip_time_t, int playing );
+};
+
+inline void Gb_Env::reset()
+{
+	env_delay = 0;
+	Gb_Osc::reset();
+}
+
+#endif
diff --git a/libraries/game-music-emu/gme/Gbs_Emu.cpp b/libraries/game-music-emu/gme/Gbs_Emu.cpp
new file mode 100644
index 000000000..6c5def339
--- /dev/null
+++ b/libraries/game-music-emu/gme/Gbs_Emu.cpp
@@ -0,0 +1,290 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Gbs_Emu.h"
+
+#include "blargg_endian.h"
+#include <string.h>
+
+/* Copyright (C) 2003-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+Gbs_Emu::equalizer_t const Gbs_Emu::handheld_eq   =
+	Music_Emu::make_equalizer( -47.0, 2000 );
+Gbs_Emu::equalizer_t const Gbs_Emu::headphones_eq =
+	Music_Emu::make_equalizer( 0.0, 300 );
+
+Gbs_Emu::Gbs_Emu()
+{
+	set_type( gme_gbs_type );
+	
+	static const char* const names [Gb_Apu::osc_count] = {
+		"Square 1", "Square 2", "Wave", "Noise"
+	};
+	set_voice_names( names );
+	
+	static int const types [Gb_Apu::osc_count] = {
+		wave_type | 1, wave_type | 2, wave_type | 0, mixed_type | 0
+	};
+	set_voice_types( types );
+	
+	set_silence_lookahead( 6 );
+	set_max_initial_silence( 21 );
+	set_gain( 1.2 );
+	
+	set_equalizer( make_equalizer( -1.0, 120 ) );
+}
+
+Gbs_Emu::~Gbs_Emu() { }
+
+void Gbs_Emu::unload()
+{
+	rom.clear();
+	Music_Emu::unload();
+}
+
+// Track info
+
+static void copy_gbs_fields( Gbs_Emu::header_t const& h, track_info_t* out )
+{
+	GME_COPY_FIELD( h, out, game );
+	GME_COPY_FIELD( h, out, author );
+	GME_COPY_FIELD( h, out, copyright );
+}
+
+blargg_err_t Gbs_Emu::track_info_( track_info_t* out, int ) const
+{
+	copy_gbs_fields( header_, out );
+	return 0;
+}
+
+static blargg_err_t check_gbs_header( void const* header )
+{
+	if ( memcmp( header, "GBS", 3 ) )
+		return gme_wrong_file_type;
+	return 0;
+}
+
+struct Gbs_File : Gme_Info_
+{
+	Gbs_Emu::header_t h;
+	
+	Gbs_File() { set_type( gme_gbs_type ); }
+	
+	blargg_err_t load_( Data_Reader& in )
+	{
+		blargg_err_t err = in.read( &h, Gbs_Emu::header_size );
+		if ( err )
+			return (err == in.eof_error ? gme_wrong_file_type : err);
+		
+		set_track_count( h.track_count );
+		return check_gbs_header( &h );
+	}
+	
+	blargg_err_t track_info_( track_info_t* out, int ) const
+	{
+		copy_gbs_fields( h, out );
+		return 0;
+	}
+};
+
+static Music_Emu* new_gbs_emu () { return BLARGG_NEW Gbs_Emu ; }
+static Music_Emu* new_gbs_file() { return BLARGG_NEW Gbs_File; }
+
+static gme_type_t_ const gme_gbs_type_ = { "Game Boy", 0, &new_gbs_emu, &new_gbs_file, "GBS", 1 };
+BLARGG_EXPORT extern gme_type_t const gme_gbs_type = &gme_gbs_type_;
+
+// Setup
+
+blargg_err_t Gbs_Emu::load_( Data_Reader& in )
+{
+	assert( offsetof (header_t,copyright [32]) == header_size );
+	RETURN_ERR( rom.load( in, header_size, &header_, 0 ) );
+	
+	set_track_count( header_.track_count );
+	RETURN_ERR( check_gbs_header( &header_ ) );
+	
+	if ( header_.vers != 1 )
+		set_warning( "Unknown file version" );
+	
+	if ( header_.timer_mode & 0x78 )
+		set_warning( "Invalid timer mode" );
+	
+	unsigned load_addr = get_le16( header_.load_addr );
+	if ( (header_.load_addr [1] | header_.init_addr [1] | header_.play_addr [1]) > 0x7F ||
+			load_addr < 0x400 )
+		set_warning( "Invalid load/init/play address" );
+	
+	set_voice_count( Gb_Apu::osc_count );
+	
+	apu.volume( gain() );
+	
+	return setup_buffer( 4194304 );
+}
+
+void Gbs_Emu::update_eq( blip_eq_t const& eq )
+{
+	apu.treble_eq( eq );
+}
+
+void Gbs_Emu::set_voice( int i, Blip_Buffer* c, Blip_Buffer* l, Blip_Buffer* r )
+{
+	apu.osc_output( i, c, l, r );
+}
+
+// Emulation
+
+// see gb_cpu_io.h for read/write functions
+
+void Gbs_Emu::set_bank( int n )
+{
+	blargg_long addr = rom.mask_addr( n * (blargg_long) bank_size );
+	if ( addr == 0 && rom.size() > bank_size )
+	{
+		// TODO: what is the correct behavior? Current Game & Watch Gallery
+		// rip requires that this have no effect or set to bank 1.
+		//debug_printf( "Selected ROM bank 0\n" );
+		return;
+		//n = 1;
+	}
+	cpu::map_code( bank_size, bank_size, rom.at_addr( addr ) );
+}
+
+void Gbs_Emu::update_timer()
+{
+	if ( header_.timer_mode & 0x04 )
+	{
+		static byte const rates [4] = { 10, 4, 6, 8 };
+		int shift = rates [ram [hi_page + 7] & 3] - (header_.timer_mode >> 7);
+		play_period = (256L - ram [hi_page + 6]) << shift;
+	}
+	else
+	{
+		play_period = 70224; // 59.73 Hz
+	}
+	if ( tempo() != 1.0 )
+		play_period = blip_time_t (play_period / tempo());
+}
+
+static uint8_t const sound_data [Gb_Apu::register_count] = {
+	0x80, 0xBF, 0x00, 0x00, 0xBF, // square 1
+	0x00, 0x3F, 0x00, 0x00, 0xBF, // square 2
+	0x7F, 0xFF, 0x9F, 0x00, 0xBF, // wave
+	0x00, 0xFF, 0x00, 0x00, 0xBF, // noise
+	0x77, 0xF3, 0xF1, // vin/volume, status, power mode
+	0, 0, 0, 0, 0, 0, 0, 0, 0, // unused
+	0xAC, 0xDD, 0xDA, 0x48, 0x36, 0x02, 0xCF, 0x16, // waveform data
+	0x2C, 0x04, 0xE5, 0x2C, 0xAC, 0xDD, 0xDA, 0x48
+};
+
+void Gbs_Emu::cpu_jsr( gb_addr_t addr )
+{
+	check( cpu::r.sp == get_le16( header_.stack_ptr ) );
+	cpu::r.pc = addr;
+	cpu_write( --cpu::r.sp, idle_addr >> 8 );
+	cpu_write( --cpu::r.sp, idle_addr&0xFF );
+}
+
+void Gbs_Emu::set_tempo_( double t )
+{
+	apu.set_tempo( t );
+	update_timer();
+}
+
+blargg_err_t Gbs_Emu::start_track_( int track )
+{
+	RETURN_ERR( Classic_Emu::start_track_( track ) );
+	
+	memset( ram, 0, 0x4000 );
+	memset( ram + 0x4000, 0xFF, 0x1F80 );
+	memset( ram + 0x5F80, 0, sizeof ram - 0x5F80 );
+	ram [hi_page] = 0; // joypad reads back as 0
+	
+	apu.reset();
+	for ( int i = 0; i < (int) sizeof sound_data; i++ )
+		apu.write_register( 0, i + apu.start_addr, sound_data [i] );
+	
+	unsigned load_addr = get_le16( header_.load_addr );
+	rom.set_addr( load_addr );
+	cpu::rst_base = load_addr;
+	
+	cpu::reset( rom.unmapped() );
+	
+	cpu::map_code( ram_addr, 0x10000 - ram_addr, ram );
+	cpu::map_code( 0, bank_size, rom.at_addr( 0 ) );
+	set_bank( rom.size() > bank_size );
+	
+	ram [hi_page + 6] = header_.timer_modulo;
+	ram [hi_page + 7] = header_.timer_mode;
+	update_timer();
+	next_play = play_period;
+	
+	cpu::r.a  = track;
+	cpu::r.pc = idle_addr;
+	cpu::r.sp = get_le16( header_.stack_ptr );
+	cpu_time  = 0;
+	cpu_jsr( get_le16( header_.init_addr ) );
+	
+	return 0;
+}
+
+blargg_err_t Gbs_Emu::run_clocks( blip_time_t& duration, int )
+{
+	cpu_time = 0;
+	while ( cpu_time < duration )
+	{
+		long count = duration - cpu_time;
+		cpu_time = duration;
+		bool result = cpu::run( count );
+		cpu_time -= cpu::remain();
+		
+		if ( result )
+		{
+			if ( cpu::r.pc == idle_addr )
+			{
+				if ( next_play > duration )
+				{
+					cpu_time = duration;
+					break;
+				}
+				
+				if ( cpu_time < next_play )
+					cpu_time = next_play;
+				next_play += play_period;
+				cpu_jsr( get_le16( header_.play_addr ) );
+				GME_FRAME_HOOK( this );
+				// TODO: handle timer rates different than 60 Hz
+			}
+			else if ( cpu::r.pc > 0xFFFF )
+			{
+				debug_printf( "PC wrapped around\n" );
+				cpu::r.pc &= 0xFFFF;
+			}
+			else
+			{
+				set_warning( "Emulation error (illegal/unsupported instruction)" );
+				debug_printf( "Bad opcode $%.2x at $%.4x\n",
+						(int) *cpu::get_code( cpu::r.pc ), (int) cpu::r.pc );
+				cpu::r.pc = (cpu::r.pc + 1) & 0xFFFF;
+				cpu_time += 6;
+			}
+		}
+	}
+	
+	duration = cpu_time;
+	next_play -= cpu_time;
+	if ( next_play < 0 ) // could go negative if routine is taking too long to return
+		next_play = 0;
+	apu.end_frame( cpu_time );
+	
+	return 0;
+}
diff --git a/libraries/game-music-emu/gme/Gbs_Emu.h b/libraries/game-music-emu/gme/Gbs_Emu.h
new file mode 100644
index 000000000..580f395c6
--- /dev/null
+++ b/libraries/game-music-emu/gme/Gbs_Emu.h
@@ -0,0 +1,88 @@
+// Nintendo Game Boy GBS music file emulator
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef GBS_EMU_H
+#define GBS_EMU_H
+
+#include "Classic_Emu.h"
+#include "Gb_Apu.h"
+#include "Gb_Cpu.h"
+
+class Gbs_Emu : private Gb_Cpu, public Classic_Emu {
+	typedef Gb_Cpu cpu;
+public:
+	// Equalizer profiles for Game Boy Color speaker and headphones
+	static equalizer_t const handheld_eq;
+	static equalizer_t const headphones_eq;
+	
+	// GBS file header
+	enum { header_size = 112 };
+	struct header_t
+	{
+		char tag [3];
+		byte vers;
+		byte track_count;
+		byte first_track;
+		byte load_addr [2];
+		byte init_addr [2];
+		byte play_addr [2];
+		byte stack_ptr [2];
+		byte timer_modulo;
+		byte timer_mode;
+		char game [32];
+		char author [32];
+		char copyright [32];
+	};
+	
+	// Header for currently loaded file
+	header_t const& header() const { return header_; }
+	
+	static gme_type_t static_type() { return gme_gbs_type; }
+	
+public:
+	// deprecated
+	using Music_Emu::load;
+	blargg_err_t load( header_t const& h, Data_Reader& in ) // use Remaining_Reader
+			{ return load_remaining_( &h, sizeof h, in ); }
+
+public:
+	Gbs_Emu();
+	~Gbs_Emu();
+protected:
+	blargg_err_t track_info_( track_info_t*, int track ) const;
+	blargg_err_t load_( Data_Reader& );
+	blargg_err_t start_track_( int );
+	blargg_err_t run_clocks( blip_time_t&, int );
+	void set_tempo_( double );
+	void set_voice( int, Blip_Buffer*, Blip_Buffer*, Blip_Buffer* );
+	void update_eq( blip_eq_t const& );
+	void unload();
+private:
+	// rom
+	enum { bank_size = 0x4000 };
+	Rom_Data<bank_size> rom;
+	void set_bank( int );
+	
+	// timer
+	blip_time_t cpu_time;
+	blip_time_t play_period;
+	blip_time_t next_play;
+	void update_timer();
+	
+	header_t header_;
+	void cpu_jsr( gb_addr_t );
+	
+public: private: friend class Gb_Cpu;
+	blip_time_t clock() const { return cpu_time - cpu::remain(); }
+	
+	enum { joypad_addr = 0xFF00 };
+	enum { ram_addr = 0xA000 };
+	enum { hi_page = 0xFF00 - ram_addr };
+	byte ram [0x4000 + 0x2000 + Gb_Cpu::cpu_padding];
+	Gb_Apu apu;
+	
+	int cpu_read( gb_addr_t );
+	void cpu_write( gb_addr_t, int );
+};
+
+#endif
diff --git a/libraries/game-music-emu/gme/Gme_File.cpp b/libraries/game-music-emu/gme/Gme_File.cpp
new file mode 100644
index 000000000..a5e4516d6
--- /dev/null
+++ b/libraries/game-music-emu/gme/Gme_File.cpp
@@ -0,0 +1,216 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Gme_File.h"
+
+#include "blargg_endian.h"
+#include <string.h>
+
+/* Copyright (C) 2003-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+const char* const gme_wrong_file_type = "Wrong file type for this emulator";
+
+void Gme_File::clear_playlist()
+{
+	playlist.clear();
+	clear_playlist_();
+	track_count_ = raw_track_count_;
+}
+
+void Gme_File::unload()
+{
+	clear_playlist(); // *before* clearing track count
+	track_count_     = 0;
+	raw_track_count_ = 0;
+	file_data.clear();
+}
+
+Gme_File::Gme_File()
+{
+	type_         = 0;
+	user_data_    = 0;
+	user_cleanup_ = 0;
+	unload(); // clears fields
+	blargg_verify_byte_order(); // used by most emulator types, so save them the trouble
+}
+
+Gme_File::~Gme_File()
+{
+	if ( user_cleanup_ )
+		user_cleanup_( user_data_ );
+}
+
+blargg_err_t Gme_File::load_mem_( byte const* data, long size )
+{
+	require( data != file_data.begin() ); // load_mem_() or load_() must be overridden
+	Mem_File_Reader in( data, size );
+	return load_( in );
+}
+
+blargg_err_t Gme_File::load_( Data_Reader& in )
+{
+	RETURN_ERR( file_data.resize( in.remain() ) );
+	RETURN_ERR( in.read( file_data.begin(), file_data.size() ) );
+	return load_mem_( file_data.begin(), file_data.size() );
+}
+
+// public load functions call this at beginning
+void Gme_File::pre_load() { unload(); }
+
+void Gme_File::post_load_() { }
+
+// public load functions call this at end
+blargg_err_t Gme_File::post_load( blargg_err_t err )
+{
+	if ( !track_count() )
+		set_track_count( type()->track_count );
+	if ( !err )
+		post_load_();
+	else
+		unload();
+	
+	return err;
+}
+
+// Public load functions
+
+blargg_err_t Gme_File::load_mem( void const* in, long size )
+{
+	pre_load();
+	return post_load( load_mem_( (byte const*) in, size ) );
+}
+
+blargg_err_t Gme_File::load( Data_Reader& in )
+{
+	pre_load();
+	return post_load( load_( in ) );
+}
+
+blargg_err_t Gme_File::load_file( const char* path )
+{
+	pre_load();
+	GME_FILE_READER in;
+	RETURN_ERR( in.open( path ) );
+	return post_load( load_( in ) );
+}
+
+blargg_err_t Gme_File::load_remaining_( void const* h, long s, Data_Reader& in )
+{
+	Remaining_Reader rem( h, s, &in );
+	return load( rem );
+}
+
+// Track info
+
+void Gme_File::copy_field_( char* out, const char* in, int in_size )
+{
+	if ( !in || !*in )
+		return;
+	
+	// remove spaces/junk from beginning
+	while ( in_size && unsigned (*in - 1) <= ' ' - 1 )
+	{
+		in++;
+		in_size--;
+	}
+	
+	// truncate
+	if ( in_size > max_field_ )
+		in_size = max_field_;
+	
+	// find terminator
+	int len = 0;
+	while ( len < in_size && in [len] )
+		len++;
+	
+	// remove spaces/junk from end
+	while ( len && unsigned (in [len - 1]) <= ' ' )
+		len--;
+	
+	// copy
+	out [len] = 0;
+	memcpy( out, in, len );
+	
+	// strip out stupid fields that should have been left blank
+	if ( !strcmp( out, "?" ) || !strcmp( out, "<?>" ) || !strcmp( out, "< ? >" ) )
+		out [0] = 0;
+}
+
+void Gme_File::copy_field_( char* out, const char* in )
+{
+	copy_field_( out, in, max_field_ );
+}
+
+blargg_err_t Gme_File::remap_track_( int* track_io ) const
+{
+	if ( (unsigned) *track_io >= (unsigned) track_count() )
+		return "Invalid track";
+	
+	if ( (unsigned) *track_io < (unsigned) playlist.size() )
+	{
+		M3u_Playlist::entry_t const& e = playlist [*track_io];
+		*track_io = 0;
+		if ( e.track >= 0 )
+		{
+			*track_io = e.track;
+			if ( !(type_->flags_ & 0x02) )
+				*track_io -= e.decimal_track;
+		}
+		if ( *track_io >= raw_track_count_ )
+			return "Invalid track in m3u playlist";
+	}
+	else
+	{
+		check( !playlist.size() );
+	}
+	return 0;
+}
+
+blargg_err_t Gme_File::track_info( track_info_t* out, int track ) const
+{
+	out->track_count = track_count();
+	out->length        = -1;
+	out->loop_length   = -1;
+	out->intro_length  = -1;
+	out->song [0]      = 0;
+	
+	out->game [0]      = 0;
+	out->author [0]    = 0;
+	out->copyright [0] = 0;
+	out->comment [0]   = 0;
+	out->dumper [0]    = 0;
+	out->system [0]    = 0;
+	
+	copy_field_( out->system, type()->system );
+	
+	int remapped = track;
+	RETURN_ERR( remap_track_( &remapped ) );
+	RETURN_ERR( track_info_( out, remapped ) );
+	
+	// override with m3u info
+	if ( playlist.size() )
+	{
+		M3u_Playlist::info_t const& i = playlist.info();
+		copy_field_( out->game  , i.title );
+		copy_field_( out->author, i.engineer );
+		copy_field_( out->author, i.composer );
+		copy_field_( out->dumper, i.ripping );
+		
+		M3u_Playlist::entry_t const& e = playlist [track];
+		copy_field_( out->song, e.name );
+		if ( e.length >= 0 ) out->length       = e.length * 1000L;
+		if ( e.intro  >= 0 ) out->intro_length = e.intro  * 1000L;
+		if ( e.loop   >= 0 ) out->loop_length  = e.loop   * 1000L;
+	}
+	return 0;
+}
diff --git a/libraries/game-music-emu/gme/Gme_File.h b/libraries/game-music-emu/gme/Gme_File.h
new file mode 100644
index 000000000..3ec36bc8e
--- /dev/null
+++ b/libraries/game-music-emu/gme/Gme_File.h
@@ -0,0 +1,173 @@
+// Common interface to game music file loading and information
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef GME_FILE_H
+#define GME_FILE_H
+
+#include "gme.h"
+#include "blargg_common.h"
+#include "Data_Reader.h"
+#include "M3u_Playlist.h"
+
+// Error returned if file is wrong type
+//extern const char gme_wrong_file_type []; // declared in gme.h
+
+struct gme_type_t_
+{
+	const char* system;         /* name of system this music file type is generally for */
+	int track_count;            /* non-zero for formats with a fixed number of tracks */
+	Music_Emu* (*new_emu)();    /* Create new emulator for this type (useful in C++ only) */
+	Music_Emu* (*new_info)();   /* Create new info reader for this type */
+	
+	/* internal */
+	const char* extension_;
+	int flags_;
+};
+
+struct track_info_t
+{
+	long track_count;
+	
+	/* times in milliseconds; -1 if unknown */
+	long length;
+	long intro_length;
+	long loop_length;
+	
+	/* empty string if not available */
+	char system    [256];
+	char game      [256];
+	char song      [256];
+	char author    [256];
+	char copyright [256];
+	char comment   [256];
+	char dumper    [256];
+};
+enum { gme_max_field = 255 };
+
+struct Gme_File {
+public:
+// File loading
+	
+	// Each loads game music data from a file and returns an error if
+	// file is wrong type or is seriously corrupt. They also set warning
+	// string for minor problems.
+	
+	// Load from file on disk
+	blargg_err_t load_file( const char* path );
+	
+	// Load from custom data source (see Data_Reader.h)
+	blargg_err_t load( Data_Reader& );
+	
+	// Load from file already read into memory. Keeps pointer to data, so you
+	// must not free it until you're done with the file.
+	blargg_err_t load_mem( void const* data, long size );
+	
+	// Load an m3u playlist. Must be done after loading main music file.
+	blargg_err_t load_m3u( const char* path );
+	blargg_err_t load_m3u( Data_Reader& in );
+	
+	// Clears any loaded m3u playlist and any internal playlist that the music
+	// format supports (NSFE for example).
+	void clear_playlist();
+	
+// Informational
+	
+	// Type of emulator. For example if this returns gme_nsfe_type, this object
+	// is an NSFE emulator, and you can cast to an Nsfe_Emu* if necessary.
+	gme_type_t type() const;
+	
+	// Most recent warning string, or NULL if none. Clears current warning after
+	// returning.
+	const char* warning();
+	
+	// Number of tracks or 0 if no file has been loaded
+	int track_count() const;
+	
+	// Get information for a track (length, name, author, etc.)
+	// See gme.h for definition of struct track_info_t.
+	blargg_err_t track_info( track_info_t* out, int track ) const;
+	
+// User data/cleanup
+	
+	// Set/get pointer to data you want to associate with this emulator.
+	// You can use this for whatever you want.
+	void set_user_data( void* p )       { user_data_ = p; }
+	void* user_data() const             { return user_data_; }
+	
+	// Register cleanup function to be called when deleting emulator, or NULL to
+	// clear it. Passes user_data to cleanup function.
+	void set_user_cleanup( gme_user_cleanup_t func ) { user_cleanup_ = func; }
+	
+public:
+	// deprecated
+	int error_count() const; // use warning()
+public:
+	Gme_File();
+	virtual ~Gme_File();
+	BLARGG_DISABLE_NOTHROW
+	typedef uint8_t byte;
+protected:
+	// Services
+	void set_track_count( int n )       { track_count_ = raw_track_count_ = n; }
+	void set_warning( const char* s )   { warning_ = s; }
+	void set_type( gme_type_t t )       { type_ = t; }
+	blargg_err_t load_remaining_( void const* header, long header_size, Data_Reader& remaining );
+	
+	// Overridable
+	virtual void unload();  // called before loading file and if loading fails
+	virtual blargg_err_t load_( Data_Reader& ); // default loads then calls load_mem_()
+	virtual blargg_err_t load_mem_( byte const* data, long size ); // use data in memory
+	virtual blargg_err_t track_info_( track_info_t* out, int track ) const = 0;
+	virtual void pre_load();
+	virtual void post_load_();
+	virtual void clear_playlist_() { }
+	
+public:
+	blargg_err_t remap_track_( int* track_io ) const; // need by Music_Emu
+private:
+	// noncopyable
+	Gme_File( const Gme_File& );
+	Gme_File& operator = ( const Gme_File& );
+	
+	gme_type_t type_;
+	int track_count_;
+	int raw_track_count_;
+	const char* warning_;
+	void* user_data_;
+	gme_user_cleanup_t user_cleanup_;
+	M3u_Playlist playlist;
+	char playlist_warning [64];
+	blargg_vector<byte> file_data; // only if loaded into memory using default load
+	
+	blargg_err_t load_m3u_( blargg_err_t );
+	blargg_err_t post_load( blargg_err_t err );
+public:
+	// track_info field copying
+	enum { max_field_ = 255 };
+	static void copy_field_( char* out, const char* in );
+	static void copy_field_( char* out, const char* in, int len );
+};
+	
+Music_Emu* gme_new_( Music_Emu*, long sample_rate );
+
+#define GME_COPY_FIELD( in, out, name ) \
+	{ Gme_File::copy_field_( out->name, in.name, sizeof in.name ); }
+
+#ifndef GME_FILE_READER
+	#define GME_FILE_READER Std_File_Reader
+#elif defined (GME_FILE_READER_INCLUDE)
+	#include GME_FILE_READER_INCLUDE
+#endif
+
+inline gme_type_t Gme_File::type() const            { return type_; }
+inline int Gme_File::error_count() const            { return warning_ != 0; }
+inline int Gme_File::track_count() const            { return track_count_; }
+
+inline const char* Gme_File::warning()
+{
+	const char* s = warning_;
+	warning_ = 0;
+	return s;
+}
+
+#endif
diff --git a/libraries/game-music-emu/gme/Gym_Emu.cpp b/libraries/game-music-emu/gme/Gym_Emu.cpp
new file mode 100644
index 000000000..bb99ff033
--- /dev/null
+++ b/libraries/game-music-emu/gme/Gym_Emu.cpp
@@ -0,0 +1,380 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Gym_Emu.h"
+
+#include "blargg_endian.h"
+#include <string.h>
+
+/* Copyright (C) 2003-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+double const min_tempo = 0.25;
+double const oversample_factor = 5 / 3.0;
+double const fm_gain = 3.0;
+
+const long base_clock = 53700300;
+const long clock_rate = base_clock / 15;
+
+Gym_Emu::Gym_Emu()
+{
+	data = 0;
+	pos  = 0;
+	set_type( gme_gym_type );
+	
+	static const char* const names [] = {
+		"FM 1", "FM 2", "FM 3", "FM 4", "FM 5", "FM 6", "PCM", "PSG"
+	};
+	set_voice_names( names );
+	set_silence_lookahead( 1 ); // tracks should already be trimmed
+}
+
+Gym_Emu::~Gym_Emu() { }
+
+// Track info
+
+static void get_gym_info( Gym_Emu::header_t const& h, long length, track_info_t* out )
+{
+	if ( !memcmp( h.tag, "GYMX", 4 ) )
+	{
+		length = length * 50 / 3; // 1000 / 60
+		long loop = get_le32( h.loop_start );
+		if ( loop )
+		{
+			out->intro_length = loop * 50 / 3;
+			out->loop_length  = length - out->intro_length;
+		}
+		else
+		{
+			out->length = length;
+			out->intro_length = length; // make it clear that track is no longer than length
+			out->loop_length = 0;
+		}
+		
+		// more stupidity where the field should have been left
+		if ( strcmp( h.song, "Unknown Song" ) )
+			GME_COPY_FIELD( h, out, song );
+		
+		if ( strcmp( h.game, "Unknown Game" ) )
+			GME_COPY_FIELD( h, out, game );
+		
+		if ( strcmp( h.copyright, "Unknown Publisher" ) )
+			GME_COPY_FIELD( h, out, copyright );
+		
+		if ( strcmp( h.dumper, "Unknown Person" ) )
+			GME_COPY_FIELD( h, out, dumper );
+		
+		if ( strcmp( h.comment, "Header added by YMAMP" ) )
+			GME_COPY_FIELD( h, out, comment );
+	}
+}
+
+blargg_err_t Gym_Emu::track_info_( track_info_t* out, int ) const
+{
+	get_gym_info( header_, track_length(), out );
+	return 0;
+}
+
+static long gym_track_length( byte const* p, byte const* end )
+{
+	long time = 0;
+	while ( p < end )
+	{
+		switch ( *p++ )
+		{
+			case 0:
+				time++;
+				break;
+			
+			case 1:
+			case 2:
+				p += 2;
+				break;
+			
+			case 3:
+				p += 1;
+				break;
+		}
+	}
+	return time;
+}
+
+long Gym_Emu::track_length() const { return gym_track_length( data, data_end ); }
+
+static blargg_err_t check_header( byte const* in, long size, int* data_offset = 0 )
+{
+	if ( size < 4 )
+		return gme_wrong_file_type;
+	
+	if ( memcmp( in, "GYMX", 4 ) == 0 )
+	{
+		if ( size < Gym_Emu::header_size + 1 )
+			return gme_wrong_file_type;
+		
+		if ( memcmp( ((Gym_Emu::header_t const*) in)->packed, "\0\0\0\0", 4 ) != 0 )
+			return "Packed GYM file not supported";
+		
+		if ( data_offset )
+			*data_offset = Gym_Emu::header_size;
+	}
+	else if ( *in > 3 )
+	{
+		return gme_wrong_file_type;
+	}
+	
+	return 0;
+}
+
+struct Gym_File : Gme_Info_
+{
+	byte const* file_begin;
+	byte const* file_end;
+	int data_offset;
+	
+	Gym_File() { set_type( gme_gym_type ); }
+	
+	blargg_err_t load_mem_( byte const* in, long size )
+	{
+		file_begin = in;
+		file_end   = in + size;
+		data_offset = 0;
+		return check_header( in, size, &data_offset );
+	}
+	
+	blargg_err_t track_info_( track_info_t* out, int ) const
+	{
+		long length = gym_track_length( &file_begin [data_offset], file_end );
+		get_gym_info( *(Gym_Emu::header_t const*) file_begin, length, out );
+		return 0;
+	}
+};
+
+static Music_Emu* new_gym_emu () { return BLARGG_NEW Gym_Emu ; }
+static Music_Emu* new_gym_file() { return BLARGG_NEW Gym_File; }
+
+static gme_type_t_ const gme_gym_type_ = { "Sega Genesis", 1, &new_gym_emu, &new_gym_file, "GYM", 0 };
+BLARGG_EXPORT extern gme_type_t const gme_gym_type = &gme_gym_type_;
+
+// Setup
+
+blargg_err_t Gym_Emu::set_sample_rate_( long sample_rate )
+{
+	blip_eq_t eq( -32, 8000, sample_rate );
+	apu.treble_eq( eq );
+	dac_synth.treble_eq( eq );
+	apu.volume( 0.135 * fm_gain * gain() );
+	dac_synth.volume( 0.125 / 256 * fm_gain * gain() );
+	double factor = Dual_Resampler::setup( oversample_factor, 0.990, fm_gain * gain() );
+	fm_sample_rate = sample_rate * factor;
+	
+	RETURN_ERR( blip_buf.set_sample_rate( sample_rate, int (1000 / 60.0 / min_tempo) ) );
+	blip_buf.clock_rate( clock_rate );
+	
+	RETURN_ERR( fm.set_rate( fm_sample_rate, base_clock / 7.0 ) );
+	RETURN_ERR( Dual_Resampler::reset( long (1.0 / 60 / min_tempo * sample_rate) ) );
+	
+	return 0;
+}
+
+void Gym_Emu::set_tempo_( double t )
+{
+	if ( t < min_tempo )
+	{
+		set_tempo( min_tempo );
+		return;
+	}
+	
+	if ( blip_buf.sample_rate() )
+	{
+		clocks_per_frame = long (clock_rate / 60 / tempo());
+		Dual_Resampler::resize( long (sample_rate() / (60.0 * tempo())) );
+	}
+}
+
+void Gym_Emu::mute_voices_( int mask )
+{
+	Music_Emu::mute_voices_( mask );
+	fm.mute_voices( mask );
+	dac_muted = (mask & 0x40) != 0;
+	apu.output( (mask & 0x80) ? 0 : &blip_buf );
+}
+
+blargg_err_t Gym_Emu::load_mem_( byte const* in, long size )
+{
+	assert( offsetof (header_t,packed [4]) == header_size );
+	int offset = 0;
+	RETURN_ERR( check_header( in, size, &offset ) );
+	set_voice_count( 8 );
+	
+	data     = in + offset;
+	data_end = in + size;
+	loop_begin = 0;
+	
+	if ( offset )
+		header_ = *(header_t const*) in;
+	else
+		memset( &header_, 0, sizeof header_ );
+	
+	return 0;
+}
+
+// Emulation
+
+blargg_err_t Gym_Emu::start_track_( int track )
+{
+	RETURN_ERR( Music_Emu::start_track_( track ) );
+	
+	pos         = data;
+	loop_remain = get_le32( header_.loop_start );
+	
+	prev_dac_count = 0;
+	dac_enabled    = false;
+	dac_amp        = -1;
+	
+	fm.reset();
+	apu.reset();
+	blip_buf.clear();
+	Dual_Resampler::clear();
+	return 0;
+}
+
+void Gym_Emu::run_dac( int dac_count )
+{
+	// Guess beginning and end of sample and adjust rate and buffer position accordingly.
+	
+	// count dac samples in next frame
+	int next_dac_count = 0;
+	const byte* p = this->pos;
+	int cmd;
+	while ( (cmd = *p++) != 0 )
+	{
+		int data = *p++;
+		if ( cmd <= 2 )
+			++p;
+		if ( cmd == 1 && data == 0x2A )
+			next_dac_count++;
+	}
+	
+	// detect beginning and end of sample
+	int rate_count = dac_count;
+	int start = 0;
+	if ( !prev_dac_count && next_dac_count && dac_count < next_dac_count )
+	{
+		rate_count = next_dac_count;
+		start = next_dac_count - dac_count;
+	}
+	else if ( prev_dac_count && !next_dac_count && dac_count < prev_dac_count )
+	{
+		rate_count = prev_dac_count;
+	}
+	
+	// Evenly space samples within buffer section being used
+	blip_resampled_time_t period = blip_buf.resampled_duration( clocks_per_frame ) / rate_count;
+	
+	blip_resampled_time_t time = blip_buf.resampled_time( 0 ) +
+			period * start + (period >> 1);
+	
+	int dac_amp = this->dac_amp;
+	if ( dac_amp < 0 )
+		dac_amp = dac_buf [0];
+	
+	for ( int i = 0; i < dac_count; i++ )
+	{
+		int delta = dac_buf [i] - dac_amp;
+		dac_amp += delta;
+		dac_synth.offset_resampled( time, delta, &blip_buf );
+		time += period;
+	}
+	this->dac_amp = dac_amp;
+}
+
+void Gym_Emu::parse_frame()
+{
+	int dac_count = 0;
+	const byte* pos = this->pos;
+	
+	if ( loop_remain && !--loop_remain )
+		loop_begin = pos; // find loop on first time through sequence
+	
+	int cmd;
+	while ( (cmd = *pos++) != 0 )
+	{
+		int data = *pos++;
+		if ( cmd == 1 )
+		{
+			int data2 = *pos++;
+			if ( data != 0x2A )
+			{
+				if ( data == 0x2B )
+					dac_enabled = (data2 & 0x80) != 0;
+				
+				fm.write0( data, data2 );
+			}
+			else if ( dac_count < (int) sizeof dac_buf )
+			{
+				dac_buf [dac_count] = data2;
+				dac_count += dac_enabled;
+			}
+		}
+		else if ( cmd == 2 )
+		{
+			fm.write1( data, *pos++ );
+		}
+		else if ( cmd == 3 )
+		{
+			apu.write_data( 0, data );
+		}
+		else
+		{
+			// to do: many GYM streams are full of errors, and error count should
+			// reflect cases where music is really having problems
+			//log_error(); 
+			--pos; // put data back
+		}
+	}
+	
+	// loop
+	if ( pos >= data_end )
+	{
+		check( pos == data_end );
+		
+		if ( loop_begin )
+			pos = loop_begin;
+		else
+			set_track_ended();
+	}
+	this->pos = pos;
+	
+	// dac
+	if ( dac_count && !dac_muted )
+		run_dac( dac_count );
+	prev_dac_count = dac_count;
+}
+
+int Gym_Emu::play_frame( blip_time_t blip_time, int sample_count, sample_t* buf )
+{
+	if ( !track_ended() )
+		parse_frame();
+	
+	apu.end_frame( blip_time );
+	
+	memset( buf, 0, sample_count * sizeof *buf );
+	fm.run( sample_count >> 1, buf );
+	
+	return sample_count;
+}
+
+blargg_err_t Gym_Emu::play_( long count, sample_t* out )
+{
+	Dual_Resampler::dual_play( count, out, blip_buf );
+	return 0;
+}
diff --git a/libraries/game-music-emu/gme/Gym_Emu.h b/libraries/game-music-emu/gme/Gym_Emu.h
new file mode 100644
index 000000000..290f57f5c
--- /dev/null
+++ b/libraries/game-music-emu/gme/Gym_Emu.h
@@ -0,0 +1,82 @@
+// Sega Genesis/Mega Drive GYM music file emulator
+// Includes with PCM timing recovery to improve sample quality.
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef GYM_EMU_H
+#define GYM_EMU_H
+
+#include "Dual_Resampler.h"
+#include "Ym2612_Emu.h"
+#include "Music_Emu.h"
+#include "Sms_Apu.h"
+
+class Gym_Emu : public Music_Emu, private Dual_Resampler {
+public:
+	// GYM file header
+	enum { header_size = 428 };
+	struct header_t
+	{
+	    char tag [4];
+	    char song [32];
+	    char game [32];
+	    char copyright [32];
+	    char emulator [32];
+	    char dumper [32];
+	    char comment [256];
+	    byte loop_start [4]; // in 1/60 seconds, 0 if not looped
+	    byte packed [4];
+	};
+	
+	// Header for currently loaded file
+	header_t const& header() const { return header_; }
+	
+	static gme_type_t static_type() { return gme_gym_type; }
+	
+public:
+	// deprecated
+	using Music_Emu::load;
+	blargg_err_t load( header_t const& h, Data_Reader& in ) // use Remaining_Reader
+			{ return load_remaining_( &h, sizeof h, in ); }
+	enum { gym_rate = 60 }; 
+	long track_length() const; // use track_info()
+
+public:
+	Gym_Emu();
+	~Gym_Emu();
+protected:
+	blargg_err_t load_mem_( byte const*, long );
+	blargg_err_t track_info_( track_info_t*, int track ) const;
+	blargg_err_t set_sample_rate_( long sample_rate );
+	blargg_err_t start_track_( int );
+	blargg_err_t play_( long count, sample_t* );
+	void mute_voices_( int );
+	void set_tempo_( double );
+	int play_frame( blip_time_t blip_time, int sample_count, sample_t* buf );
+private:
+	// sequence data begin, loop begin, current position, end
+	const byte* data;
+	const byte* loop_begin;
+	const byte* pos;
+	const byte* data_end;
+	blargg_long loop_remain; // frames remaining until loop beginning has been located
+	header_t header_;
+	double fm_sample_rate;
+	blargg_long clocks_per_frame;
+	void parse_frame();
+	
+	// dac (pcm)
+	int dac_amp;
+	int prev_dac_count;
+	bool dac_enabled;
+	bool dac_muted;
+	void run_dac( int );
+	
+	// sound
+	Blip_Buffer blip_buf;
+	Ym2612_Emu fm;
+	Blip_Synth<blip_med_quality,1> dac_synth;
+	Sms_Apu apu;
+	byte dac_buf [1024];
+};
+
+#endif
diff --git a/libraries/game-music-emu/gme/Hes_Apu.cpp b/libraries/game-music-emu/gme/Hes_Apu.cpp
new file mode 100644
index 000000000..1df811592
--- /dev/null
+++ b/libraries/game-music-emu/gme/Hes_Apu.cpp
@@ -0,0 +1,315 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Hes_Apu.h"
+
+#include <string.h>
+
+/* Copyright (C) 2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+bool const center_waves = true; // reduces asymmetry and clamping when starting notes
+
+Hes_Apu::Hes_Apu()
+{
+	Hes_Osc* osc = &oscs [osc_count];
+	do
+	{
+		osc--;
+		osc->outputs [0] = 0;
+		osc->outputs [1] = 0;
+		osc->chans [0] = 0;
+		osc->chans [1] = 0;
+		osc->chans [2] = 0;
+	}
+	while ( osc != oscs );
+	
+	reset();
+}
+
+void Hes_Apu::reset()
+{
+	latch   = 0;
+	balance = 0xFF;
+	
+	Hes_Osc* osc = &oscs [osc_count];
+	do
+	{
+		osc--;
+		memset( osc, 0, offsetof (Hes_Osc,outputs) );
+		osc->noise_lfsr = 1;
+		osc->control    = 0x40;
+		osc->balance    = 0xFF;
+	}
+	while ( osc != oscs );
+}
+
+void Hes_Apu::osc_output( int index, Blip_Buffer* center, Blip_Buffer* left, Blip_Buffer* right )
+{
+	require( (unsigned) index < osc_count );
+	oscs [index].chans [0] = center;
+	oscs [index].chans [1] = left;
+	oscs [index].chans [2] = right;
+	
+	Hes_Osc* osc = &oscs [osc_count];
+	do
+	{
+		osc--;
+		balance_changed( *osc );
+	}
+	while ( osc != oscs );
+}
+
+void Hes_Osc::run_until( synth_t& synth_, blip_time_t end_time )
+{
+	Blip_Buffer* const osc_outputs_0 = outputs [0]; // cache often-used values
+	if ( osc_outputs_0 && control & 0x80 )
+	{
+		int dac = this->dac;
+		
+		int const volume_0 = volume [0];
+		{
+			int delta = dac * volume_0 - last_amp [0];
+			if ( delta )
+				synth_.offset( last_time, delta, osc_outputs_0 );
+			osc_outputs_0->set_modified();
+		}
+		
+		Blip_Buffer* const osc_outputs_1 = outputs [1];
+		int const volume_1 = volume [1];
+		if ( osc_outputs_1 )
+		{
+			int delta = dac * volume_1 - last_amp [1];
+			if ( delta )
+				synth_.offset( last_time, delta, osc_outputs_1 );
+			osc_outputs_1->set_modified();
+		}
+		
+		blip_time_t time = last_time + delay;
+		if ( time < end_time )
+		{
+			if ( noise & 0x80 )
+			{
+				if ( volume_0 | volume_1 )
+				{
+					// noise
+					int const period = (32 - (noise & 0x1F)) * 64; // TODO: correct?
+					unsigned noise_lfsr = this->noise_lfsr;
+					do
+					{
+						int new_dac = 0x1F & -(noise_lfsr >> 1 & 1);
+						// Implemented using "Galios configuration"
+						// TODO: find correct LFSR algorithm
+						noise_lfsr = (noise_lfsr >> 1) ^ (0xE008 & -(noise_lfsr & 1));
+						//noise_lfsr = (noise_lfsr >> 1) ^ (0x6000 & -(noise_lfsr & 1));
+						int delta = new_dac - dac;
+						if ( delta )
+						{
+							dac = new_dac;
+							synth_.offset( time, delta * volume_0, osc_outputs_0 );
+							if ( osc_outputs_1 )
+								synth_.offset( time, delta * volume_1, osc_outputs_1 );
+						}
+						time += period;
+					}
+					while ( time < end_time );
+					
+					this->noise_lfsr = noise_lfsr;
+					assert( noise_lfsr );
+				}
+			}
+			else if ( !(control & 0x40) )
+			{
+				// wave
+				int phase = (this->phase + 1) & 0x1F; // pre-advance for optimal inner loop
+				int period = this->period * 2;
+				if ( period >= 14 && (volume_0 | volume_1) )
+				{
+					do
+					{
+						int new_dac = wave [phase];
+						phase = (phase + 1) & 0x1F;
+						int delta = new_dac - dac;
+						if ( delta )
+						{
+							dac = new_dac;
+							synth_.offset( time, delta * volume_0, osc_outputs_0 );
+							if ( osc_outputs_1 )
+								synth_.offset( time, delta * volume_1, osc_outputs_1 );
+						}
+						time += period;
+					}
+					while ( time < end_time );
+				}
+				else
+				{
+					if ( !period )
+					{
+						// TODO: Gekisha Boy assumes that period = 0 silences wave
+						//period = 0x1000 * 2;
+						period = 1;
+						//if ( !(volume_0 | volume_1) )
+						//  debug_printf( "Used period 0\n" );
+					}
+					
+					// maintain phase when silent
+					blargg_long count = (end_time - time + period - 1) / period;
+					phase += count; // phase will be masked below
+					time += count * period;
+				}
+				this->phase = (phase - 1) & 0x1F; // undo pre-advance
+			}
+		}
+		time -= end_time;
+		if ( time < 0 )
+			time = 0;
+		delay = time;
+		
+		this->dac = dac;
+		last_amp [0] = dac * volume_0;
+		last_amp [1] = dac * volume_1;
+	}
+	last_time = end_time;
+}
+
+void Hes_Apu::balance_changed( Hes_Osc& osc )
+{
+	static short const log_table [32] = { // ~1.5 db per step
+		#define ENTRY( factor ) short (factor * Hes_Osc::amp_range / 31.0 + 0.5)
+		ENTRY( 0.000000 ),ENTRY( 0.005524 ),ENTRY( 0.006570 ),ENTRY( 0.007813 ),
+		ENTRY( 0.009291 ),ENTRY( 0.011049 ),ENTRY( 0.013139 ),ENTRY( 0.015625 ),
+		ENTRY( 0.018581 ),ENTRY( 0.022097 ),ENTRY( 0.026278 ),ENTRY( 0.031250 ),
+		ENTRY( 0.037163 ),ENTRY( 0.044194 ),ENTRY( 0.052556 ),ENTRY( 0.062500 ),
+		ENTRY( 0.074325 ),ENTRY( 0.088388 ),ENTRY( 0.105112 ),ENTRY( 0.125000 ),
+		ENTRY( 0.148651 ),ENTRY( 0.176777 ),ENTRY( 0.210224 ),ENTRY( 0.250000 ),
+		ENTRY( 0.297302 ),ENTRY( 0.353553 ),ENTRY( 0.420448 ),ENTRY( 0.500000 ),
+		ENTRY( 0.594604 ),ENTRY( 0.707107 ),ENTRY( 0.840896 ),ENTRY( 1.000000 ),
+		#undef ENTRY
+	};
+	
+	int vol = (osc.control & 0x1F) - 0x1E * 2;
+	
+	int left  = vol + (osc.balance >> 3 & 0x1E) + (balance >> 3 & 0x1E);
+	if ( left  < 0 ) left  = 0;
+	
+	int right = vol + (osc.balance << 1 & 0x1E) + (balance << 1 & 0x1E);
+	if ( right < 0 ) right = 0;
+	
+	left  = log_table [left ];
+	right = log_table [right];
+	
+	// optimizing for the common case of being centered also allows easy
+	// panning using Effects_Buffer
+	osc.outputs [0] = osc.chans [0]; // center
+	osc.outputs [1] = 0;
+	if ( left != right )
+	{
+		osc.outputs [0] = osc.chans [1]; // left
+		osc.outputs [1] = osc.chans [2]; // right
+	}
+	
+	if ( center_waves )
+	{
+		osc.last_amp [0] += (left  - osc.volume [0]) * 16;
+		osc.last_amp [1] += (right - osc.volume [1]) * 16;
+	}
+	
+	osc.volume [0] = left;
+	osc.volume [1] = right;
+}
+
+void Hes_Apu::write_data( blip_time_t time, int addr, int data )
+{
+	if ( addr == 0x800 )
+	{
+		latch = data & 7;
+	}
+	else if ( addr == 0x801 )
+	{
+		if ( balance != data )
+		{
+			balance = data;
+			
+			Hes_Osc* osc = &oscs [osc_count];
+			do
+			{
+				osc--;
+				osc->run_until( synth, time );
+				balance_changed( *oscs );
+			}
+			while ( osc != oscs );
+		}
+	}
+	else if ( latch < osc_count )
+	{
+		Hes_Osc& osc = oscs [latch];
+		osc.run_until( synth, time );
+		switch ( addr )
+		{
+		case 0x802:
+			osc.period = (osc.period & 0xF00) | data;
+			break;
+		
+		case 0x803:
+			osc.period = (osc.period & 0x0FF) | ((data & 0x0F) << 8);
+			break;
+		
+		case 0x804:
+			if ( osc.control & 0x40 & ~data )
+				osc.phase = 0;
+			osc.control = data;
+			balance_changed( osc );
+			break;
+		
+		case 0x805:
+			osc.balance = data;
+			balance_changed( osc );
+			break;
+		
+		case 0x806:
+			data &= 0x1F;
+			if ( !(osc.control & 0x40) )
+			{
+				osc.wave [osc.phase] = data;
+				osc.phase = (osc.phase + 1) & 0x1F;
+			}
+			else if ( osc.control & 0x80 )
+			{
+				osc.dac = data;
+			}
+			break;
+		
+		 case 0x807:
+		 	if ( &osc >= &oscs [4] )
+		 		osc.noise = data;
+		 	break;
+		 
+		 case 0x809:
+		 	if ( !(data & 0x80) && (data & 0x03) != 0 )
+		 		debug_printf( "HES LFO not supported\n" );
+		}
+	}
+}
+
+void Hes_Apu::end_frame( blip_time_t end_time )
+{
+	Hes_Osc* osc = &oscs [osc_count];
+	do
+	{
+		osc--;
+		if ( end_time > osc->last_time )
+			osc->run_until( synth, end_time );
+		assert( osc->last_time >= end_time );
+		osc->last_time -= end_time;
+	}
+	while ( osc != oscs );
+}
diff --git a/libraries/game-music-emu/gme/Hes_Apu.h b/libraries/game-music-emu/gme/Hes_Apu.h
new file mode 100644
index 000000000..1efc0a064
--- /dev/null
+++ b/libraries/game-music-emu/gme/Hes_Apu.h
@@ -0,0 +1,66 @@
+// Turbo Grafx 16 (PC Engine) PSG sound chip emulator
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef HES_APU_H
+#define HES_APU_H
+
+#include "blargg_common.h"
+#include "Blip_Buffer.h"
+
+struct Hes_Osc
+{
+	unsigned char wave [32];
+	short volume [2];
+	int last_amp [2];
+	int delay;
+	int period;
+	unsigned char noise;
+	unsigned char phase;
+	unsigned char balance;
+	unsigned char dac;
+	blip_time_t last_time;
+	
+	Blip_Buffer* outputs [2];
+	Blip_Buffer* chans [3];
+	unsigned noise_lfsr;
+	unsigned char control;
+	
+	enum { amp_range = 0x8000 };
+	typedef Blip_Synth<blip_med_quality,1> synth_t;
+	
+	void run_until( synth_t& synth, blip_time_t );
+};
+
+class Hes_Apu {
+public:
+	void treble_eq( blip_eq_t const& );
+	void volume( double );
+	
+	enum { osc_count = 6 };
+	void osc_output( int index, Blip_Buffer* center, Blip_Buffer* left, Blip_Buffer* right );
+	
+	void reset();
+	
+	enum { start_addr = 0x0800 };
+	enum { end_addr   = 0x0809 };
+	void write_data( blip_time_t, int addr, int data );
+	
+	void end_frame( blip_time_t );
+	
+public:
+	Hes_Apu();
+private:
+	Hes_Osc oscs [osc_count];
+	int latch;
+	int balance;
+	Hes_Osc::synth_t synth;
+	
+	void balance_changed( Hes_Osc& );
+	void recalc_chans();
+};
+
+inline void Hes_Apu::volume( double v ) { synth.volume( 1.8 / osc_count / Hes_Osc::amp_range * v ); }
+
+inline void Hes_Apu::treble_eq( blip_eq_t const& eq ) { synth.treble_eq( eq ); }
+
+#endif
diff --git a/libraries/game-music-emu/gme/Hes_Cpu.cpp b/libraries/game-music-emu/gme/Hes_Cpu.cpp
new file mode 100644
index 000000000..095a1851a
--- /dev/null
+++ b/libraries/game-music-emu/gme/Hes_Cpu.cpp
@@ -0,0 +1,1295 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Hes_Cpu.h"
+
+#include "blargg_endian.h"
+
+//#include "hes_cpu_log.h"
+
+/* Copyright (C) 2003-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+// TODO: support T flag, including clearing it at appropriate times?
+
+// all zero-page should really use whatever is at page 1, but that would
+// reduce efficiency quite a bit
+int const ram_addr = 0x2000;
+
+#define FLUSH_TIME()    (void) (s.time = s_time)
+#define CACHE_TIME()    (void) (s_time = s.time)
+
+#include "hes_cpu_io.h"
+
+#include "blargg_source.h"
+
+#if BLARGG_NONPORTABLE
+	#define PAGE_OFFSET( addr ) (addr)
+#else
+	#define PAGE_OFFSET( addr ) ((addr) & (page_size - 1))
+#endif
+
+// status flags
+int const st_n = 0x80;
+int const st_v = 0x40;
+int const st_t = 0x20;
+int const st_b = 0x10;
+int const st_d = 0x08;
+int const st_i = 0x04;
+int const st_z = 0x02;
+int const st_c = 0x01;
+
+void Hes_Cpu::reset()
+{
+	check( state == &state_ );
+	state = &state_;
+
+	state_.time = 0;
+	state_.base = 0;
+	irq_time_   = future_hes_time;
+	end_time_   = future_hes_time;
+
+	r.status = st_i;
+	r.sp     = 0;
+	r.pc     = 0;
+	r.a      = 0;
+	r.x      = 0;
+	r.y      = 0;
+
+	blargg_verify_byte_order();
+}
+
+void Hes_Cpu::set_mmr( int reg, int bank )
+{
+	assert( (unsigned) reg <= page_count ); // allow page past end to be set
+	assert( (unsigned) bank < 0x100 );
+	mmr [reg] = bank;
+	uint8_t const* code = CPU_SET_MMR( this, reg, bank );
+	state->code_map [reg] = code - PAGE_OFFSET( reg << page_shift );
+}
+
+#define TIME    (s_time + s.base)
+
+#define READ( addr )            CPU_READ( this, (addr), TIME )
+#define WRITE( addr, data )     {CPU_WRITE( this, (addr), (data), TIME );}
+#define READ_LOW( addr )        (ram [int (addr)])
+#define WRITE_LOW( addr, data ) (void) (READ_LOW( addr ) = (data))
+#define READ_PROG( addr )       (s.code_map [(addr) >> page_shift] [PAGE_OFFSET( addr )])
+
+#define SET_SP( v )     (sp = ((v) + 1) | 0x100)
+#define GET_SP()        ((sp - 1) & 0xFF)
+#define PUSH( v )       ((sp = (sp - 1) | 0x100), WRITE_LOW( sp, v ))
+
+bool Hes_Cpu::run( hes_time_t end_time )
+{
+	bool illegal_encountered = false;
+	set_end_time( end_time );
+	state_t s = this->state_;
+	this->state = &s;
+	// even on x86, using s.time in place of s_time was slower
+	blargg_long s_time = s.time;
+
+	// registers
+	uint_fast16_t pc = r.pc;
+	uint_fast8_t a = r.a;
+	uint_fast8_t x = r.x;
+	uint_fast8_t y = r.y;
+	uint_fast16_t sp;
+	SET_SP( r.sp );
+
+	#define IS_NEG (nz & 0x8080)
+
+	#define CALC_STATUS( out ) do {\
+		out = status & (st_v | st_d | st_i);\
+		out |= ((nz >> 8) | nz) & st_n;\
+		out |= c >> 8 & st_c;\
+		if ( !(nz & 0xFF) ) out |= st_z;\
+	} while ( 0 )
+
+	#define SET_STATUS( in ) do {\
+		status = in & (st_v | st_d | st_i);\
+		nz = in << 8;\
+		c = nz;\
+		nz |= ~in & st_z;\
+	} while ( 0 )
+
+	uint_fast8_t status;
+	uint_fast16_t c;  // carry set if (c & 0x100) != 0
+	uint_fast16_t nz; // Z set if (nz & 0xFF) == 0, N set if (nz & 0x8080) != 0
+	{
+		uint_fast8_t temp = r.status;
+		SET_STATUS( temp );
+	}
+
+	goto loop;
+branch_not_taken:
+	s_time -= 2;
+loop:
+
+	#ifndef NDEBUG
+	{
+		hes_time_t correct = end_time_;
+		if ( !(status & st_i) && correct > irq_time_ )
+			correct = irq_time_;
+		check( s.base == correct );
+		/*
+		static long count;
+		if ( count == 1844 ) Debugger();
+		if ( s.base != correct ) debug_printf( "%ld\n", count );
+		count++;
+		*/
+	}
+	#endif
+
+	check( (unsigned) GET_SP() < 0x100 );
+	check( (unsigned) a < 0x100 );
+	check( (unsigned) x < 0x100 );
+
+	uint8_t const* instr = s.code_map [pc >> page_shift];
+	uint_fast8_t opcode;
+
+	// TODO: eliminate this special case
+	#if BLARGG_NONPORTABLE
+		opcode = instr [pc];
+		pc++;
+		instr += pc;
+	#else
+		instr += PAGE_OFFSET( pc );
+		opcode = *instr++;
+		pc++;
+	#endif
+
+	// TODO: each reference lists slightly different timing values, ugh
+	static uint8_t const clock_table [256] =
+	{// 0 1 2  3 4 5 6 7 8 9 A B C D E F
+		1,7,3, 4,6,4,6,7,3,2,2,2,7,5,7,6,// 0
+		4,7,7, 4,6,4,6,7,2,5,2,2,7,5,7,6,// 1
+		7,7,3, 4,4,4,6,7,4,2,2,2,5,5,7,6,// 2
+		4,7,7, 2,4,4,6,7,2,5,2,2,5,5,7,6,// 3
+		7,7,3, 4,8,4,6,7,3,2,2,2,4,5,7,6,// 4
+		4,7,7, 5,2,4,6,7,2,5,3,2,2,5,7,6,// 5
+		7,7,2, 2,4,4,6,7,4,2,2,2,7,5,7,6,// 6
+		4,7,7,17,4,4,6,7,2,5,4,2,7,5,7,6,// 7
+		4,7,2, 7,4,4,4,7,2,2,2,2,5,5,5,6,// 8
+		4,7,7, 8,4,4,4,7,2,5,2,2,5,5,5,6,// 9
+		2,7,2, 7,4,4,4,7,2,2,2,2,5,5,5,6,// A
+		4,7,7, 8,4,4,4,7,2,5,2,2,5,5,5,6,// B
+		2,7,2,17,4,4,6,7,2,2,2,2,5,5,7,6,// C
+		4,7,7,17,2,4,6,7,2,5,3,2,2,5,7,6,// D
+		2,7,2,17,4,4,6,7,2,2,2,2,5,5,7,6,// E
+		4,7,7,17,2,4,6,7,2,5,4,2,2,5,7,6 // F
+	}; // 0x00 was 8
+
+	uint_fast16_t data;
+	data = clock_table [opcode];
+	if ( (s_time += data) >= 0 )
+		goto possibly_out_of_time;
+almost_out_of_time:
+
+	data = *instr;
+
+	#ifdef HES_CPU_LOG_H
+		log_cpu( "new", pc - 1, opcode, instr [0], instr [1], instr [2],
+				instr [3], instr [4], instr [5] );
+		//log_opcode( opcode );
+	#endif
+
+	switch ( opcode )
+	{
+possibly_out_of_time:
+		if ( s_time < (int) data )
+			goto almost_out_of_time;
+		s_time -= data;
+		goto out_of_time;
+
+// Macros
+
+#define GET_MSB()           (instr [1])
+#define ADD_PAGE( out )     (pc++, out = data + 0x100 * GET_MSB());
+#define GET_ADDR()          GET_LE16( instr )
+
+// TODO: is the penalty really always added? the original 6502 was much better
+//#define PAGE_CROSS_PENALTY( lsb ) (void) (s_time += (lsb) >> 8)
+#define PAGE_CROSS_PENALTY( lsb )
+
+// Branch
+
+// TODO: more efficient way to handle negative branch that wraps PC around
+#define BRANCH( cond )\
+{\
+	int_fast16_t offset = (int8_t) data;\
+	pc++;\
+	if ( !(cond) ) goto branch_not_taken;\
+	pc = uint16_t (pc + offset);\
+	goto loop;\
+}
+
+	case 0xF0: // BEQ
+		BRANCH( !((uint8_t) nz) );
+
+	case 0xD0: // BNE
+		BRANCH( (uint8_t) nz );
+
+	case 0x10: // BPL
+		BRANCH( !IS_NEG );
+
+	case 0x90: // BCC
+		BRANCH( !(c & 0x100) )
+
+	case 0x30: // BMI
+		BRANCH( IS_NEG )
+
+	case 0x50: // BVC
+		BRANCH( !(status & st_v) )
+
+	case 0x70: // BVS
+		BRANCH( status & st_v )
+
+	case 0xB0: // BCS
+		BRANCH( c & 0x100 )
+
+	case 0x80: // BRA
+	branch_taken:
+		BRANCH( true );
+
+	case 0xFF:
+		if ( pc == idle_addr + 1 )
+			goto idle_done;
+	case 0x0F: // BBRn
+	case 0x1F:
+	case 0x2F:
+	case 0x3F:
+	case 0x4F:
+	case 0x5F:
+	case 0x6F:
+	case 0x7F:
+	case 0x8F: // BBSn
+	case 0x9F:
+	case 0xAF:
+	case 0xBF:
+	case 0xCF:
+	case 0xDF:
+	case 0xEF: {
+		uint_fast16_t t = 0x101 * READ_LOW( data );
+		t ^= 0xFF;
+		pc++;
+		data = GET_MSB();
+		BRANCH( t & (1 << (opcode >> 4)) )
+	}
+
+	case 0x4C: // JMP abs
+		pc = GET_ADDR();
+		goto loop;
+
+	case 0x7C: // JMP (ind+X)
+		data += x;
+	case 0x6C:{// JMP (ind)
+		data += 0x100 * GET_MSB();
+		pc = GET_LE16( &READ_PROG( data ) );
+		goto loop;
+	}
+
+// Subroutine
+
+	case 0x44: // BSR
+		WRITE_LOW( 0x100 | (sp - 1), pc >> 8 );
+		sp = (sp - 2) | 0x100;
+		WRITE_LOW( sp, pc );
+		goto branch_taken;
+
+	case 0x20: { // JSR
+		uint_fast16_t temp = pc + 1;
+		pc = GET_ADDR();
+		WRITE_LOW( 0x100 | (sp - 1), temp >> 8 );
+		sp = (sp - 2) | 0x100;
+		WRITE_LOW( sp, temp );
+		goto loop;
+	}
+
+	case 0x60: // RTS
+		pc = 0x100 * READ_LOW( 0x100 | (sp - 0xFF) );
+		pc += 1 + READ_LOW( sp );
+		sp = (sp - 0xFE) | 0x100;
+		goto loop;
+
+	case 0x00: // BRK
+		goto handle_brk;
+
+// Common
+
+	case 0xBD:{// LDA abs,X
+		PAGE_CROSS_PENALTY( data + x );
+		uint_fast16_t addr = GET_ADDR() + x;
+		pc += 2;
+		CPU_READ_FAST( this, addr, TIME, nz );
+		a = nz;
+		goto loop;
+	}
+
+	case 0x9D:{// STA abs,X
+		uint_fast16_t addr = GET_ADDR() + x;
+		pc += 2;
+		CPU_WRITE_FAST( this, addr, a, TIME );
+		goto loop;
+	}
+
+	case 0x95: // STA zp,x
+		data = uint8_t (data + x);
+	case 0x85: // STA zp
+		pc++;
+		WRITE_LOW( data, a );
+		goto loop;
+
+	case 0xAE:{// LDX abs
+		uint_fast16_t addr = GET_ADDR();
+		pc += 2;
+		CPU_READ_FAST( this, addr, TIME, nz );
+		x = nz;
+		goto loop;
+	}
+
+	case 0xA5: // LDA zp
+		a = nz = READ_LOW( data );
+		pc++;
+		goto loop;
+
+// Load/store
+
+	{
+		uint_fast16_t addr;
+	case 0x91: // STA (ind),Y
+		addr = 0x100 * READ_LOW( uint8_t (data + 1) );
+		addr += READ_LOW( data ) + y;
+		pc++;
+		goto sta_ptr;
+
+	case 0x81: // STA (ind,X)
+		data = uint8_t (data + x);
+	case 0x92: // STA (ind)
+		addr = 0x100 * READ_LOW( uint8_t (data + 1) );
+		addr += READ_LOW( data );
+		pc++;
+		goto sta_ptr;
+
+	case 0x99: // STA abs,Y
+		data += y;
+	case 0x8D: // STA abs
+		addr = data + 0x100 * GET_MSB();
+		pc += 2;
+	sta_ptr:
+		CPU_WRITE_FAST( this, addr, a, TIME );
+		goto loop;
+	}
+
+	{
+		uint_fast16_t addr;
+	case 0xA1: // LDA (ind,X)
+		data = uint8_t (data + x);
+	case 0xB2: // LDA (ind)
+		addr = 0x100 * READ_LOW( uint8_t (data + 1) );
+		addr += READ_LOW( data );
+		pc++;
+		goto a_nz_read_addr;
+
+	case 0xB1:// LDA (ind),Y
+		addr = READ_LOW( data ) + y;
+		PAGE_CROSS_PENALTY( addr );
+		addr += 0x100 * READ_LOW( (uint8_t) (data + 1) );
+		pc++;
+		goto a_nz_read_addr;
+
+	case 0xB9: // LDA abs,Y
+		data += y;
+		PAGE_CROSS_PENALTY( data );
+	case 0xAD: // LDA abs
+		addr = data + 0x100 * GET_MSB();
+		pc += 2;
+	a_nz_read_addr:
+		CPU_READ_FAST( this, addr, TIME, nz );
+		a = nz;
+		goto loop;
+	}
+
+	case 0xBE:{// LDX abs,y
+		PAGE_CROSS_PENALTY( data + y );
+		uint_fast16_t addr = GET_ADDR() + y;
+		pc += 2;
+		FLUSH_TIME();
+		x = nz = READ( addr );
+		CACHE_TIME();
+		goto loop;
+	}
+
+	case 0xB5: // LDA zp,x
+		a = nz = READ_LOW( uint8_t (data + x) );
+		pc++;
+		goto loop;
+
+	case 0xA9: // LDA #imm
+		pc++;
+		a  = data;
+		nz = data;
+		goto loop;
+
+// Bit operations
+
+	case 0x3C: // BIT abs,x
+		data += x;
+	case 0x2C:{// BIT abs
+		uint_fast16_t addr;
+		ADD_PAGE( addr );
+		FLUSH_TIME();
+		nz = READ( addr );
+		CACHE_TIME();
+		goto bit_common;
+	}
+	case 0x34: // BIT zp,x
+		data = uint8_t (data + x);
+	case 0x24: // BIT zp
+		data = READ_LOW( data );
+	case 0x89: // BIT imm
+		nz = data;
+	bit_common:
+		pc++;
+		status &= ~st_v;
+		status |= nz & st_v;
+		if ( nz & a )
+			goto loop; // Z should be clear, and nz must be non-zero if nz & a is
+		nz <<= 8; // set Z flag without affecting N flag
+		goto loop;
+
+	{
+		uint_fast16_t addr;
+
+	case 0xB3: // TST abs,x
+		addr = GET_MSB() + x;
+		goto tst_abs;
+
+	case 0x93: // TST abs
+		addr = GET_MSB();
+	tst_abs:
+		addr += 0x100 * instr [2];
+		pc++;
+		FLUSH_TIME();
+		nz = READ( addr );
+		CACHE_TIME();
+		goto tst_common;
+	}
+
+	case 0xA3: // TST zp,x
+		nz = READ_LOW( uint8_t (GET_MSB() + x) );
+		goto tst_common;
+
+	case 0x83: // TST zp
+		nz = READ_LOW( GET_MSB() );
+	tst_common:
+		pc += 2;
+		status &= ~st_v;
+		status |= nz & st_v;
+		if ( nz & data )
+			goto loop; // Z should be clear, and nz must be non-zero if nz & data is
+		nz <<= 8; // set Z flag without affecting N flag
+		goto loop;
+
+	{
+		uint_fast16_t addr;
+	case 0x0C: // TSB abs
+	case 0x1C: // TRB abs
+		addr = GET_ADDR();
+		pc++;
+		goto txb_addr;
+
+	// TODO: everyone lists different behaviors for the status flags, ugh
+	case 0x04: // TSB zp
+	case 0x14: // TRB zp
+		addr = data + ram_addr;
+	txb_addr:
+		FLUSH_TIME();
+		nz = a | READ( addr );
+		if ( opcode & 0x10 )
+			nz ^= a; // bits from a will already be set, so this clears them
+		status &= ~st_v;
+		status |= nz & st_v;
+		pc++;
+		WRITE( addr, nz );
+		CACHE_TIME();
+		goto loop;
+	}
+
+	case 0x07: // RMBn
+	case 0x17:
+	case 0x27:
+	case 0x37:
+	case 0x47:
+	case 0x57:
+	case 0x67:
+	case 0x77:
+		pc++;
+		READ_LOW( data ) &= ~(1 << (opcode >> 4));
+		goto loop;
+
+	case 0x87: // SMBn
+	case 0x97:
+	case 0xA7:
+	case 0xB7:
+	case 0xC7:
+	case 0xD7:
+	case 0xE7:
+	case 0xF7:
+		pc++;
+		READ_LOW( data ) |= 1 << ((opcode >> 4) - 8);
+		goto loop;
+
+// Load/store
+
+	case 0x9E: // STZ abs,x
+		data += x;
+	case 0x9C: // STZ abs
+		ADD_PAGE( data );
+		pc++;
+		FLUSH_TIME();
+		WRITE( data, 0 );
+		CACHE_TIME();
+		goto loop;
+
+	case 0x74: // STZ zp,x
+		data = uint8_t (data + x);
+	case 0x64: // STZ zp
+		pc++;
+		WRITE_LOW( data, 0 );
+		goto loop;
+
+	case 0x94: // STY zp,x
+		data = uint8_t (data + x);
+	case 0x84: // STY zp
+		pc++;
+		WRITE_LOW( data, y );
+		goto loop;
+
+	case 0x96: // STX zp,y
+		data = uint8_t (data + y);
+	case 0x86: // STX zp
+		pc++;
+		WRITE_LOW( data, x );
+		goto loop;
+
+	case 0xB6: // LDX zp,y
+		data = uint8_t (data + y);
+	case 0xA6: // LDX zp
+		data = READ_LOW( data );
+	case 0xA2: // LDX #imm
+		pc++;
+		x = data;
+		nz = data;
+		goto loop;
+
+	case 0xB4: // LDY zp,x
+		data = uint8_t (data + x);
+	case 0xA4: // LDY zp
+		data = READ_LOW( data );
+	case 0xA0: // LDY #imm
+		pc++;
+		y = data;
+		nz = data;
+		goto loop;
+
+	case 0xBC: // LDY abs,X
+		data += x;
+		PAGE_CROSS_PENALTY( data );
+	case 0xAC:{// LDY abs
+		uint_fast16_t addr = data + 0x100 * GET_MSB();
+		pc += 2;
+		FLUSH_TIME();
+		y = nz = READ( addr );
+		CACHE_TIME();
+		goto loop;
+	}
+
+	{
+		uint_fast8_t temp;
+	case 0x8C: // STY abs
+		temp = y;
+		goto store_abs;
+
+	case 0x8E: // STX abs
+		temp = x;
+	store_abs:
+		uint_fast16_t addr = GET_ADDR();
+		pc += 2;
+		FLUSH_TIME();
+		WRITE( addr, temp );
+		CACHE_TIME();
+		goto loop;
+	}
+
+// Compare
+
+	case 0xEC:{// CPX abs
+		uint_fast16_t addr = GET_ADDR();
+		pc++;
+		FLUSH_TIME();
+		data = READ( addr );
+		CACHE_TIME();
+		goto cpx_data;
+	}
+
+	case 0xE4: // CPX zp
+		data = READ_LOW( data );
+	case 0xE0: // CPX #imm
+	cpx_data:
+		nz = x - data;
+		pc++;
+		c = ~nz;
+		nz &= 0xFF;
+		goto loop;
+
+	case 0xCC:{// CPY abs
+		uint_fast16_t addr = GET_ADDR();
+		pc++;
+		FLUSH_TIME();
+		data = READ( addr );
+		CACHE_TIME();
+		goto cpy_data;
+	}
+
+	case 0xC4: // CPY zp
+		data = READ_LOW( data );
+	case 0xC0: // CPY #imm
+	cpy_data:
+		nz = y - data;
+		pc++;
+		c = ~nz;
+		nz &= 0xFF;
+		goto loop;
+
+// Logical
+
+#define ARITH_ADDR_MODES( op )\
+	case op - 0x04: /* (ind,x) */\
+		data = uint8_t (data + x);\
+	case op + 0x0D: /* (ind) */\
+		data = 0x100 * READ_LOW( uint8_t (data + 1) ) + READ_LOW( data );\
+		goto ptr##op;\
+	case op + 0x0C:{/* (ind),y */\
+		uint_fast16_t temp = READ_LOW( data ) + y;\
+		PAGE_CROSS_PENALTY( temp );\
+		data = temp + 0x100 * READ_LOW( uint8_t (data + 1) );\
+		goto ptr##op;\
+	}\
+	case op + 0x10: /* zp,X */\
+		data = uint8_t (data + x);\
+	case op + 0x00: /* zp */\
+		data = READ_LOW( data );\
+		goto imm##op;\
+	case op + 0x14: /* abs,Y */\
+		data += y;\
+		goto ind##op;\
+	case op + 0x18: /* abs,X */\
+		data += x;\
+	ind##op:\
+		PAGE_CROSS_PENALTY( data );\
+	case op + 0x08: /* abs */\
+		ADD_PAGE( data );\
+	ptr##op:\
+		FLUSH_TIME();\
+		data = READ( data );\
+		CACHE_TIME();\
+	case op + 0x04: /* imm */\
+	imm##op:
+
+	ARITH_ADDR_MODES( 0xC5 ) // CMP
+		nz = a - data;
+		pc++;
+		c = ~nz;
+		nz &= 0xFF;
+		goto loop;
+
+	ARITH_ADDR_MODES( 0x25 ) // AND
+		nz = (a &= data);
+		pc++;
+		goto loop;
+
+	ARITH_ADDR_MODES( 0x45 ) // EOR
+		nz = (a ^= data);
+		pc++;
+		goto loop;
+
+	ARITH_ADDR_MODES( 0x05 ) // ORA
+		nz = (a |= data);
+		pc++;
+		goto loop;
+
+// Add/subtract
+
+	ARITH_ADDR_MODES( 0xE5 ) // SBC
+		data ^= 0xFF;
+		goto adc_imm;
+
+	ARITH_ADDR_MODES( 0x65 ) // ADC
+	adc_imm: {
+		if ( status & st_d )
+			debug_printf( "Decimal mode not supported\n" );
+		int_fast16_t carry = c >> 8 & 1;
+		int_fast16_t ov = (a ^ 0x80) + carry + (int8_t) data; // sign-extend
+		status &= ~st_v;
+		status |= ov >> 2 & 0x40;
+		c = nz = a + data + carry;
+		pc++;
+		a = (uint8_t) nz;
+		goto loop;
+	}
+
+// Shift/rotate
+
+	case 0x4A: // LSR A
+		c = 0;
+	case 0x6A: // ROR A
+		nz = c >> 1 & 0x80;
+		c = a << 8;
+		nz |= a >> 1;
+		a = nz;
+		goto loop;
+
+	case 0x0A: // ASL A
+		nz = a << 1;
+		c = nz;
+		a = (uint8_t) nz;
+		goto loop;
+
+	case 0x2A: { // ROL A
+		nz = a << 1;
+		int_fast16_t temp = c >> 8 & 1;
+		c = nz;
+		nz |= temp;
+		a = (uint8_t) nz;
+		goto loop;
+	}
+
+	case 0x5E: // LSR abs,X
+		data += x;
+	case 0x4E: // LSR abs
+		c = 0;
+	case 0x6E: // ROR abs
+	ror_abs: {
+		ADD_PAGE( data );
+		FLUSH_TIME();
+		int temp = READ( data );
+		nz = (c >> 1 & 0x80) | (temp >> 1);
+		c = temp << 8;
+		goto rotate_common;
+	}
+
+	case 0x3E: // ROL abs,X
+		data += x;
+		goto rol_abs;
+
+	case 0x1E: // ASL abs,X
+		data += x;
+	case 0x0E: // ASL abs
+		c = 0;
+	case 0x2E: // ROL abs
+	rol_abs:
+		ADD_PAGE( data );
+		nz = c >> 8 & 1;
+		FLUSH_TIME();
+		nz |= (c = READ( data ) << 1);
+	rotate_common:
+		pc++;
+		WRITE( data, (uint8_t) nz );
+		CACHE_TIME();
+		goto loop;
+
+	case 0x7E: // ROR abs,X
+		data += x;
+		goto ror_abs;
+
+	case 0x76: // ROR zp,x
+		data = uint8_t (data + x);
+		goto ror_zp;
+
+	case 0x56: // LSR zp,x
+		data = uint8_t (data + x);
+	case 0x46: // LSR zp
+		c = 0;
+	case 0x66: // ROR zp
+	ror_zp: {
+		int temp = READ_LOW( data );
+		nz = (c >> 1 & 0x80) | (temp >> 1);
+		c = temp << 8;
+		goto write_nz_zp;
+	}
+
+	case 0x36: // ROL zp,x
+		data = uint8_t (data + x);
+		goto rol_zp;
+
+	case 0x16: // ASL zp,x
+		data = uint8_t (data + x);
+	case 0x06: // ASL zp
+		c = 0;
+	case 0x26: // ROL zp
+	rol_zp:
+		nz = c >> 8 & 1;
+		nz |= (c = READ_LOW( data ) << 1);
+		goto write_nz_zp;
+
+// Increment/decrement
+
+#define INC_DEC_AXY( reg, n ) reg = uint8_t (nz = reg + n); goto loop;
+
+	case 0x1A: // INA
+		INC_DEC_AXY( a, +1 )
+
+	case 0xE8: // INX
+		INC_DEC_AXY( x, +1 )
+
+	case 0xC8: // INY
+		INC_DEC_AXY( y, +1 )
+
+	case 0x3A: // DEA
+		INC_DEC_AXY( a, -1 )
+
+	case 0xCA: // DEX
+		INC_DEC_AXY( x, -1 )
+
+	case 0x88: // DEY
+		INC_DEC_AXY( y, -1 )
+
+	case 0xF6: // INC zp,x
+		data = uint8_t (data + x);
+	case 0xE6: // INC zp
+		nz = 1;
+		goto add_nz_zp;
+
+	case 0xD6: // DEC zp,x
+		data = uint8_t (data + x);
+	case 0xC6: // DEC zp
+		nz = (unsigned) -1;
+	add_nz_zp:
+		nz += READ_LOW( data );
+	write_nz_zp:
+		pc++;
+		WRITE_LOW( data, nz );
+		goto loop;
+
+	case 0xFE: // INC abs,x
+		data = x + GET_ADDR();
+		goto inc_ptr;
+
+	case 0xEE: // INC abs
+		data = GET_ADDR();
+	inc_ptr:
+		nz = 1;
+		goto inc_common;
+
+	case 0xDE: // DEC abs,x
+		data = x + GET_ADDR();
+		goto dec_ptr;
+
+	case 0xCE: // DEC abs
+		data = GET_ADDR();
+	dec_ptr:
+		nz = (unsigned) -1;
+	inc_common:
+		FLUSH_TIME();
+		nz += READ( data );
+		pc += 2;
+		WRITE( data, (uint8_t) nz );
+		CACHE_TIME();
+		goto loop;
+
+// Transfer
+
+	case 0xA8: // TAY
+		y  = a;
+		nz = a;
+		goto loop;
+
+	case 0x98: // TYA
+		a  = y;
+		nz = y;
+		goto loop;
+
+	case 0xAA: // TAX
+		x  = a;
+		nz = a;
+		goto loop;
+
+	case 0x8A: // TXA
+		a  = x;
+		nz = x;
+		goto loop;
+
+	case 0x9A: // TXS
+		SET_SP( x ); // verified (no flag change)
+		goto loop;
+
+	case 0xBA: // TSX
+		x = nz = GET_SP();
+		goto loop;
+
+	#define SWAP_REGS( r1, r2 ) {\
+		uint_fast8_t t = r1;\
+		r1 = r2;\
+		r2 = t;\
+		goto loop;\
+	}
+
+	case 0x02: // SXY
+		SWAP_REGS( x, y );
+
+	case 0x22: // SAX
+		SWAP_REGS( a, x );
+
+	case 0x42: // SAY
+		SWAP_REGS( a, y );
+
+	case 0x62: // CLA
+		a = 0;
+		goto loop;
+
+	case 0x82: // CLX
+		x = 0;
+		goto loop;
+
+	case 0xC2: // CLY
+		y = 0;
+		goto loop;
+
+// Stack
+
+	case 0x48: // PHA
+		PUSH( a );
+		goto loop;
+
+	case 0xDA: // PHX
+		PUSH( x );
+		goto loop;
+
+	case 0x5A: // PHY
+		PUSH( y );
+		goto loop;
+
+	case 0x40:{// RTI
+		uint_fast8_t temp = READ_LOW( sp );
+		pc  = READ_LOW( 0x100 | (sp - 0xFF) );
+		pc |= READ_LOW( 0x100 | (sp - 0xFE) ) * 0x100;
+		sp = (sp - 0xFD) | 0x100;
+		data = status;
+		SET_STATUS( temp );
+		this->r.status = status; // update externally-visible I flag
+		if ( (data ^ status) & st_i )
+		{
+			hes_time_t new_time = end_time_;
+			if ( !(status & st_i) && new_time > irq_time_ )
+				new_time = irq_time_;
+			blargg_long delta = s.base - new_time;
+			s.base = new_time;
+			s_time += delta;
+		}
+		goto loop;
+	}
+
+	#define POP()  READ_LOW( sp ); sp = (sp - 0xFF) | 0x100
+
+	case 0x68: // PLA
+		a = nz = POP();
+		goto loop;
+
+	case 0xFA: // PLX
+		x = nz = POP();
+		goto loop;
+
+	case 0x7A: // PLY
+		y = nz = POP();
+		goto loop;
+
+	case 0x28:{// PLP
+		uint_fast8_t temp = POP();
+		uint_fast8_t changed = status ^ temp;
+		SET_STATUS( temp );
+		if ( !(changed & st_i) )
+			goto loop; // I flag didn't change
+		if ( status & st_i )
+			goto handle_sei;
+		goto handle_cli;
+	}
+	#undef POP
+
+	case 0x08: { // PHP
+		uint_fast8_t temp;
+		CALC_STATUS( temp );
+		PUSH( temp | st_b );
+		goto loop;
+	}
+
+// Flags
+
+	case 0x38: // SEC
+		c = (unsigned) ~0;
+		goto loop;
+
+	case 0x18: // CLC
+		c = 0;
+		goto loop;
+
+	case 0xB8: // CLV
+		status &= ~st_v;
+		goto loop;
+
+	case 0xD8: // CLD
+		status &= ~st_d;
+		goto loop;
+
+	case 0xF8: // SED
+		status |= st_d;
+		goto loop;
+
+	case 0x58: // CLI
+		if ( !(status & st_i) )
+			goto loop;
+		status &= ~st_i;
+	handle_cli: {
+		this->r.status = status; // update externally-visible I flag
+		blargg_long delta = s.base - irq_time_;
+		if ( delta <= 0 )
+		{
+			if ( TIME < irq_time_ )
+				goto loop;
+			goto delayed_cli;
+		}
+		s.base = irq_time_;
+		s_time += delta;
+		if ( s_time < 0 )
+			goto loop;
+
+		if ( delta >= s_time + 1 )
+		{
+			// delayed irq until after next instruction
+			s.base += s_time + 1;
+			s_time = -1;
+			irq_time_ = s.base; // TODO: remove, as only to satisfy debug check in loop
+			goto loop;
+		}
+	delayed_cli:
+		debug_printf( "Delayed CLI not supported\n" ); // TODO: implement
+		goto loop;
+	}
+
+	case 0x78: // SEI
+		if ( status & st_i )
+			goto loop;
+		status |= st_i;
+	handle_sei: {
+		this->r.status = status; // update externally-visible I flag
+		blargg_long delta = s.base - end_time_;
+		s.base = end_time_;
+		s_time += delta;
+		if ( s_time < 0 )
+			goto loop;
+		debug_printf( "Delayed SEI not supported\n" ); // TODO: implement
+		goto loop;
+	}
+
+// Special
+
+	case 0x53:{// TAM
+		uint_fast8_t const bits = data; // avoid using data across function call
+		pc++;
+		for ( int i = 0; i < 8; i++ )
+			if ( bits & (1 << i) )
+				set_mmr( i, a );
+		goto loop;
+	}
+
+	case 0x43:{// TMA
+		pc++;
+		byte const* in = mmr;
+		do
+		{
+			if ( data & 1 )
+				a = *in;
+			in++;
+		}
+		while ( (data >>= 1) != 0 );
+		goto loop;
+	}
+
+	case 0x03: // ST0
+	case 0x13: // ST1
+	case 0x23:{// ST2
+		uint_fast16_t addr = opcode >> 4;
+		if ( addr )
+			addr++;
+		pc++;
+		FLUSH_TIME();
+		CPU_WRITE_VDP( this, addr, data, TIME );
+		CACHE_TIME();
+		goto loop;
+	}
+
+	case 0xEA: // NOP
+		goto loop;
+
+	case 0x54: // CSL
+		debug_printf( "CSL not supported\n" );
+		illegal_encountered = true;
+		goto loop;
+
+	case 0xD4: // CSH
+		goto loop;
+
+	case 0xF4: { // SET
+		//fuint16 operand = GET_MSB();
+		debug_printf( "SET not handled\n" );
+		//switch ( data )
+		//{
+		//}
+		illegal_encountered = true;
+		goto loop;
+	}
+
+// Block transfer
+
+	{
+		uint_fast16_t in_alt;
+		int_fast16_t in_inc;
+		uint_fast16_t out_alt;
+		int_fast16_t out_inc;
+
+	case 0xE3: // TIA
+		in_alt  = 0;
+		goto bxfer_alt;
+
+	case 0xF3: // TAI
+		in_alt  = 1;
+	bxfer_alt:
+		in_inc  = in_alt ^ 1;
+		out_alt = in_inc;
+		out_inc = in_alt;
+		goto bxfer;
+
+	case 0xD3: // TIN
+		in_inc  = 1;
+		out_inc = 0;
+		goto bxfer_no_alt;
+
+	case 0xC3: // TDD
+		in_inc  = -1;
+		out_inc = -1;
+		goto bxfer_no_alt;
+
+	case 0x73: // TII
+		in_inc  = 1;
+		out_inc = 1;
+	bxfer_no_alt:
+		in_alt  = 0;
+		out_alt = 0;
+	bxfer:
+		uint_fast16_t in    = GET_LE16( instr + 0 );
+		uint_fast16_t out   = GET_LE16( instr + 2 );
+		int     count = GET_LE16( instr + 4 );
+		if ( !count )
+			count = 0x10000;
+		pc += 6;
+		WRITE_LOW( 0x100 | (sp - 1), y );
+		WRITE_LOW( 0x100 | (sp - 2), a );
+		WRITE_LOW( 0x100 | (sp - 3), x );
+		FLUSH_TIME();
+		do
+		{
+			// TODO: reads from $0800-$1400 in I/O page return 0 and don't access I/O
+			uint_fast8_t t = READ( in );
+			in += in_inc;
+			in &= 0xFFFF;
+			s.time += 6;
+			if ( in_alt )
+				in_inc = -in_inc;
+			WRITE( out, t );
+			out += out_inc;
+			out &= 0xFFFF;
+			if ( out_alt )
+				out_inc = -out_inc;
+		}
+		while ( --count );
+		CACHE_TIME();
+		goto loop;
+	}
+
+// Illegal
+
+	default:
+		debug_printf( "Illegal opcode $%02X at $%04X\n", (int) opcode, (int) pc - 1 );
+		illegal_encountered = true;
+		goto loop;
+	}
+	assert( false );
+
+	int result_;
+handle_brk:
+	pc++;
+	result_ = 6;
+
+interrupt:
+	{
+		s_time += 7;
+
+		WRITE_LOW( 0x100 | (sp - 1), pc >> 8 );
+		WRITE_LOW( 0x100 | (sp - 2), pc );
+		pc = GET_LE16( &READ_PROG( 0xFFF0 ) + result_ );
+
+		sp = (sp - 3) | 0x100;
+		uint_fast8_t temp;
+		CALC_STATUS( temp );
+		if ( result_ == 6 )
+			temp |= st_b;
+		WRITE_LOW( sp, temp );
+
+		status &= ~st_d;
+		status |= st_i;
+		this->r.status = status; // update externally-visible I flag
+
+		blargg_long delta = s.base - end_time_;
+		s.base = end_time_;
+		s_time += delta;
+		goto loop;
+	}
+
+idle_done:
+	s_time = 0;
+out_of_time:
+	pc--;
+	FLUSH_TIME();
+	CPU_DONE( this, TIME, result_ );
+	CACHE_TIME();
+	if ( result_ > 0 )
+		goto interrupt;
+	if ( s_time < 0 )
+		goto loop;
+
+	s.time = s_time;
+
+	r.pc = pc;
+	r.sp = GET_SP();
+	r.a = a;
+	r.x = x;
+	r.y = y;
+
+	{
+		uint_fast8_t temp;
+		CALC_STATUS( temp );
+		r.status = temp;
+	}
+
+	this->state_ = s;
+	this->state = &this->state_;
+
+	return illegal_encountered;
+}
diff --git a/libraries/game-music-emu/gme/Hes_Cpu.h b/libraries/game-music-emu/gme/Hes_Cpu.h
new file mode 100644
index 000000000..cec46fa9e
--- /dev/null
+++ b/libraries/game-music-emu/gme/Hes_Cpu.h
@@ -0,0 +1,122 @@
+// PC Engine CPU emulator for use with HES music files
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef HES_CPU_H
+#define HES_CPU_H
+
+#include "blargg_common.h"
+
+typedef blargg_long hes_time_t; // clock cycle count
+typedef unsigned hes_addr_t; // 16-bit address
+enum { future_hes_time = INT_MAX / 2 + 1 };
+
+class Hes_Cpu {
+public:
+	void reset();
+	
+	enum { page_size = 0x2000 };
+	enum { page_shift = 13 };
+	enum { page_count = 8 };
+	void set_mmr( int reg, int bank );
+	
+	uint8_t const* get_code( hes_addr_t );
+	
+	uint8_t ram [page_size];
+	
+	// not kept updated during a call to run()
+	struct registers_t {
+		uint16_t pc;
+		uint8_t a;
+		uint8_t x;
+		uint8_t y;
+		uint8_t status;
+		uint8_t sp;
+	};
+	registers_t r;
+	
+	// page mapping registers
+	uint8_t mmr [page_count + 1];
+	
+	// Set end_time and run CPU from current time. Returns true if any illegal
+	// instructions were encountered.
+	bool run( hes_time_t end_time );
+	
+	// Time of beginning of next instruction to be executed
+	hes_time_t time() const             { return state->time + state->base; }
+	void set_time( hes_time_t t )       { state->time = t - state->base; }
+	void adjust_time( int delta )       { state->time += delta; }
+	
+	hes_time_t irq_time() const         { return irq_time_; }
+	void set_irq_time( hes_time_t );
+	
+	hes_time_t end_time() const         { return end_time_; }
+	void set_end_time( hes_time_t );
+	
+	void end_frame( hes_time_t );
+	
+	// Attempt to execute instruction here results in CPU advancing time to
+	// lesser of irq_time() and end_time() (or end_time() if IRQs are
+	// disabled)
+	enum { idle_addr = 0x1FFF };
+	
+	// Can read this many bytes past end of a page
+	enum { cpu_padding = 8 };
+	
+public:
+	Hes_Cpu() { state = &state_; }
+	enum { irq_inhibit = 0x04 };
+private:
+	// noncopyable
+	Hes_Cpu( const Hes_Cpu& );
+	Hes_Cpu& operator = ( const Hes_Cpu& );
+	
+	struct state_t {
+		uint8_t const* code_map [page_count + 1];
+		hes_time_t base;
+		blargg_long time;
+	};
+	state_t* state; // points to state_ or a local copy within run()
+	state_t state_;
+	hes_time_t irq_time_;
+	hes_time_t end_time_;
+	
+	void set_code_page( int, void const* );
+	inline int update_end_time( hes_time_t end, hes_time_t irq );
+};
+
+inline uint8_t const* Hes_Cpu::get_code( hes_addr_t addr )
+{
+	return state->code_map [addr >> page_shift] + addr
+	#if !BLARGG_NONPORTABLE
+		% (unsigned) page_size
+	#endif
+	;
+}
+
+inline int Hes_Cpu::update_end_time( hes_time_t t, hes_time_t irq )
+{
+	if ( irq < t && !(r.status & irq_inhibit) ) t = irq;
+	int delta = state->base - t;
+	state->base = t;
+	return delta;
+}
+
+inline void Hes_Cpu::set_irq_time( hes_time_t t )
+{
+	state->time += update_end_time( end_time_, (irq_time_ = t) );
+}
+
+inline void Hes_Cpu::set_end_time( hes_time_t t )
+{
+	state->time += update_end_time( (end_time_ = t), irq_time_ );
+}
+
+inline void Hes_Cpu::end_frame( hes_time_t t )
+{
+	assert( state == &state_ );
+	state_.base -= t;
+	if ( irq_time_ < future_hes_time ) irq_time_ -= t;
+	if ( end_time_ < future_hes_time ) end_time_ -= t;
+}
+
+#endif
diff --git a/libraries/game-music-emu/gme/Hes_Emu.cpp b/libraries/game-music-emu/gme/Hes_Emu.cpp
new file mode 100644
index 000000000..818691fdc
--- /dev/null
+++ b/libraries/game-music-emu/gme/Hes_Emu.cpp
@@ -0,0 +1,531 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Hes_Emu.h"
+
+#include "blargg_endian.h"
+#include <string.h>
+
+/* Copyright (C) 2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+int const timer_mask  = 0x04;
+int const vdp_mask    = 0x02;
+int const i_flag_mask = 0x04;
+int const unmapped    = 0xFF;
+
+long const period_60hz = 262 * 455L; // scanlines * clocks per scanline
+
+Hes_Emu::Hes_Emu()
+{
+	timer.raw_load = 0;
+	set_type( gme_hes_type );
+	
+	static const char* const names [Hes_Apu::osc_count] = {
+		"Wave 1", "Wave 2", "Wave 3", "Wave 4", "Multi 1", "Multi 2"
+	};
+	set_voice_names( names );
+	
+	static int const types [Hes_Apu::osc_count] = {
+		wave_type  | 0, wave_type  | 1, wave_type | 2, wave_type | 3,
+		mixed_type | 0, mixed_type | 1
+	};
+	set_voice_types( types );
+	set_silence_lookahead( 6 );
+	set_gain( 1.11 );
+}
+
+Hes_Emu::~Hes_Emu() { }
+
+void Hes_Emu::unload()
+{
+	rom.clear();
+	Music_Emu::unload();
+}
+
+// Track info
+
+static byte const* copy_field( byte const* in, char* out )
+{
+	if ( in )
+	{
+		int len = 0x20;
+		if ( in [0x1F] && !in [0x2F] )
+			len = 0x30; // fields are sometimes 16 bytes longer (ugh)
+		
+		// since text fields are where any data could be, detect non-text
+		// and fields with data after zero byte terminator
+		
+		int i = 0;
+		for ( i = 0; i < len && in [i]; i++ )
+			if ( ((in [i] + 1) & 0xFF) < ' ' + 1 ) // also treat 0xFF as non-text
+				return 0; // non-ASCII found
+		
+		for ( ; i < len; i++ )
+			if ( in [i] )
+				return 0; // data after terminator
+		
+		Gme_File::copy_field_( out, (char const*) in, len );
+		in += len;
+	}
+	return in;
+}
+
+static void copy_hes_fields( byte const* in, track_info_t* out )
+{
+	if ( *in >= ' ' )
+	{
+		in = copy_field( in, out->game );
+		in = copy_field( in, out->author );
+		in = copy_field( in, out->copyright );
+	}
+}
+
+blargg_err_t Hes_Emu::track_info_( track_info_t* out, int ) const
+{
+	copy_hes_fields( rom.begin() + 0x20, out );
+	return 0;
+}
+
+static blargg_err_t check_hes_header( void const* header )
+{
+	if ( memcmp( header, "HESM", 4 ) )
+		return gme_wrong_file_type;
+	return 0;
+}
+
+struct Hes_File : Gme_Info_
+{
+	struct header_t {
+		char header [Hes_Emu::header_size];
+		char unused [0x20];
+		byte fields [0x30 * 3];
+	} h;
+	
+	Hes_File() { set_type( gme_hes_type ); }
+	
+	blargg_err_t load_( Data_Reader& in )
+	{
+		assert( offsetof (header_t,fields) == Hes_Emu::header_size + 0x20 );
+		blargg_err_t err = in.read( &h, sizeof h );
+		if ( err )
+			return (err == in.eof_error ? gme_wrong_file_type : err);
+		return check_hes_header( &h );
+	}
+	
+	blargg_err_t track_info_( track_info_t* out, int ) const
+	{
+		copy_hes_fields( h.fields, out );
+		return 0;
+	}
+};
+
+static Music_Emu* new_hes_emu () { return BLARGG_NEW Hes_Emu ; }
+static Music_Emu* new_hes_file() { return BLARGG_NEW Hes_File; }
+
+static gme_type_t_ const gme_hes_type_ = { "PC Engine", 256, &new_hes_emu, &new_hes_file, "HES", 1 };
+BLARGG_EXPORT extern gme_type_t const gme_hes_type = &gme_hes_type_;
+
+
+// Setup
+
+blargg_err_t Hes_Emu::load_( Data_Reader& in )
+{
+	assert( offsetof (header_t,unused [4]) == header_size );
+	RETURN_ERR( rom.load( in, header_size, &header_, unmapped ) );
+	
+	RETURN_ERR( check_hes_header( header_.tag ) );
+	
+	if ( header_.vers != 0 )
+		set_warning( "Unknown file version" );
+	
+	if ( memcmp( header_.data_tag, "DATA", 4 ) )
+		set_warning( "Data header missing" );
+	
+	if ( memcmp( header_.unused, "\0\0\0\0", 4 ) )
+		set_warning( "Unknown header data" );
+	
+	// File spec supports multiple blocks, but I haven't found any, and
+	// many files have bad sizes in the only block, so it's simpler to
+	// just try to load the damn data as best as possible.
+	
+	long addr = get_le32( header_.addr );
+	long size = get_le32( header_.size );
+	long const rom_max = 0x100000;
+	if ( addr & ~(rom_max - 1) )
+	{
+		set_warning( "Invalid address" );
+		addr &= rom_max - 1;
+	}
+	if ( (unsigned long) (addr + size) > (unsigned long) rom_max )
+		set_warning( "Invalid size" );
+	
+	if ( size != rom.file_size() )
+	{
+		if ( size <= rom.file_size() - 4 && !memcmp( rom.begin() + size, "DATA", 4 ) )
+			set_warning( "Multiple DATA not supported" );
+		else if ( size < rom.file_size() )
+			set_warning( "Extra file data" );
+		else
+			set_warning( "Missing file data" );
+	}
+	
+	rom.set_addr( addr );
+	
+	set_voice_count( apu.osc_count );
+	
+	apu.volume( gain() );
+	
+	return setup_buffer( 7159091 );
+}
+
+void Hes_Emu::update_eq( blip_eq_t const& eq )
+{
+	apu.treble_eq( eq );
+}
+
+void Hes_Emu::set_voice( int i, Blip_Buffer* center, Blip_Buffer* left, Blip_Buffer* right )
+{
+	apu.osc_output( i, center, left, right );
+}
+
+// Emulation
+
+void Hes_Emu::recalc_timer_load()
+{
+	timer.load = timer.raw_load * timer_base + 1;
+}
+
+void Hes_Emu::set_tempo_( double t )
+{
+	play_period = hes_time_t (period_60hz / t);
+	timer_base = int (1024 / t);
+	recalc_timer_load();
+}
+
+blargg_err_t Hes_Emu::start_track_( int track )
+{
+	RETURN_ERR( Classic_Emu::start_track_( track ) );
+	
+	memset( ram, 0, sizeof ram ); // some HES music relies on zero fill
+	memset( sgx, 0, sizeof sgx );
+	
+	apu.reset();
+	cpu::reset();
+	
+	for ( unsigned i = 0; i < sizeof header_.banks; i++ )
+		set_mmr( i, header_.banks [i] );
+	set_mmr( page_count, 0xFF ); // unmapped beyond end of address space
+	
+	irq.disables  = timer_mask | vdp_mask;
+	irq.timer     = future_hes_time;
+	irq.vdp       = future_hes_time;
+	
+	timer.enabled = false;
+	timer.raw_load= 0x80;
+	timer.count   = timer.load;
+	timer.fired   = false;
+	timer.last_time = 0;
+	
+	vdp.latch     = 0;
+	vdp.control   = 0;
+	vdp.next_vbl  = 0;
+	
+	ram [0x1FF] = (idle_addr - 1) >> 8;
+	ram [0x1FE] = (idle_addr - 1) & 0xFF;
+	r.sp = 0xFD;
+	r.pc = get_le16( header_.init_addr );
+	r.a  = track;
+	
+	recalc_timer_load();
+	last_frame_hook = 0;
+	
+	return 0;
+}
+
+// Hardware
+
+void Hes_Emu::cpu_write_vdp( int addr, int data )
+{
+	switch ( addr )
+	{
+	case 0:
+		vdp.latch = data & 0x1F;
+		break;
+	
+	case 2:
+		if ( vdp.latch == 5 )
+		{
+			if ( data & 0x04 )
+				set_warning( "Scanline interrupt unsupported" );
+			run_until( time() );
+			vdp.control = data;
+			irq_changed();
+		}
+		else
+		{
+			debug_printf( "VDP not supported: $%02X <- $%02X\n", vdp.latch, data );
+		}
+		break;
+	
+	case 3:
+		debug_printf( "VDP MSB not supported: $%02X <- $%02X\n", vdp.latch, data );
+		break;
+	}
+}
+
+void Hes_Emu::cpu_write_( hes_addr_t addr, int data )
+{
+	if ( unsigned (addr - apu.start_addr) <= apu.end_addr - apu.start_addr )
+	{
+		GME_APU_HOOK( this, addr - apu.start_addr, data );
+		// avoid going way past end when a long block xfer is writing to I/O space
+		hes_time_t t = min( time(), end_time() + 8 );
+		apu.write_data( t, addr, data );
+		return;
+	}
+	
+	hes_time_t time = this->time();
+	switch ( addr )
+	{
+	case 0x0000:
+	case 0x0002:
+	case 0x0003:
+		cpu_write_vdp( addr, data );
+		return;
+	
+	case 0x0C00: {
+		run_until( time );
+		timer.raw_load = (data & 0x7F) + 1;
+		recalc_timer_load();
+		timer.count = timer.load;
+		break;
+	}
+	
+	case 0x0C01:
+		data &= 1;
+		if ( timer.enabled == data )
+			return;
+		run_until( time );
+		timer.enabled = data;
+		if ( data )
+			timer.count = timer.load;
+		break;
+	
+	case 0x1402:
+		run_until( time );
+		irq.disables = data;
+		if ( (data & 0xF8) && (data & 0xF8) != 0xF8 ) // flag questionable values
+			debug_printf( "Int mask: $%02X\n", data );
+		break;
+	
+	case 0x1403:
+		run_until( time );
+		if ( timer.enabled )
+			timer.count = timer.load;
+		timer.fired = false;
+		break;
+	
+#ifndef NDEBUG
+	case 0x1000: // I/O port
+	case 0x0402: // palette
+	case 0x0403:
+	case 0x0404:
+	case 0x0405:
+		return;
+		
+	default:
+		debug_printf( "unmapped write $%04X <- $%02X\n", addr, data );
+		return;
+#endif
+	}
+	
+	irq_changed();
+}
+
+int Hes_Emu::cpu_read_( hes_addr_t addr )
+{
+	hes_time_t time = this->time();
+	addr &= page_size - 1;
+	switch ( addr )
+	{
+	case 0x0000:
+		if ( irq.vdp > time )
+			return 0;
+		irq.vdp = future_hes_time;
+		run_until( time );
+		irq_changed();
+		return 0x20;
+		
+	case 0x0002:
+	case 0x0003:
+		debug_printf( "VDP read not supported: %d\n", addr );
+		return 0;
+	
+	case 0x0C01:
+		//return timer.enabled; // TODO: remove?
+	case 0x0C00:
+		run_until( time );
+		debug_printf( "Timer count read\n" );
+		return (unsigned) (timer.count - 1) / timer_base;
+	
+	case 0x1402:
+		return irq.disables;
+	
+	case 0x1403:
+		{
+			int status = 0;
+			if ( irq.timer <= time ) status |= timer_mask;
+			if ( irq.vdp   <= time ) status |= vdp_mask;
+			return status;
+		}
+		
+	#ifndef NDEBUG
+		case 0x1000: // I/O port
+		case 0x180C: // CD-ROM
+		case 0x180D:
+			break;
+		
+		default:
+			debug_printf( "unmapped read  $%04X\n", addr );
+	#endif
+	}
+	
+	return unmapped;
+}
+
+// see hes_cpu_io.h for core read/write functions
+
+// Emulation
+
+void Hes_Emu::run_until( hes_time_t present )
+{
+	while ( vdp.next_vbl < present )
+		vdp.next_vbl += play_period;
+	
+	hes_time_t elapsed = present - timer.last_time;
+	if ( elapsed > 0 )
+	{
+		if ( timer.enabled )
+		{
+			timer.count -= elapsed;
+			if ( timer.count <= 0 )
+				timer.count += timer.load;
+		}
+		timer.last_time = present;
+	}
+}
+
+void Hes_Emu::irq_changed()
+{
+	hes_time_t present = time();
+	
+	if ( irq.timer > present )
+	{
+		irq.timer = future_hes_time;
+		if ( timer.enabled && !timer.fired )
+			irq.timer = present + timer.count;
+	}
+	
+	if ( irq.vdp > present )
+	{
+		irq.vdp = future_hes_time;
+		if ( vdp.control & 0x08 )
+			irq.vdp = vdp.next_vbl;
+	}
+	
+	hes_time_t time = future_hes_time;
+	if ( !(irq.disables & timer_mask) ) time = irq.timer;
+	if ( !(irq.disables &   vdp_mask) ) time = min( time, irq.vdp );
+	
+	set_irq_time( time );
+}
+
+int Hes_Emu::cpu_done()
+{
+	check( time() >= end_time() ||
+			(!(r.status & i_flag_mask) && time() >= irq_time()) );
+	
+	if ( !(r.status & i_flag_mask) )
+	{
+		hes_time_t present = time();
+		
+		if ( irq.timer <= present && !(irq.disables & timer_mask) )
+		{
+			timer.fired = true;
+			irq.timer = future_hes_time;
+			irq_changed(); // overkill, but not worth writing custom code
+			#if GME_FRAME_HOOK_DEFINED
+			{
+				unsigned const threshold = period_60hz / 30;
+				unsigned long elapsed = present - last_frame_hook;
+				if ( elapsed - period_60hz + threshold / 2 < threshold )
+				{
+					last_frame_hook = present;
+					GME_FRAME_HOOK( this );
+				}
+			}
+			#endif
+			return 0x0A;
+		}
+		
+		if ( irq.vdp <= present && !(irq.disables & vdp_mask) )
+		{
+			// work around for bugs with music not acknowledging VDP
+			//run_until( present );
+			//irq.vdp = future_hes_time;
+			//irq_changed();
+			#if GME_FRAME_HOOK_DEFINED
+				last_frame_hook = present;
+				GME_FRAME_HOOK( this );
+			#endif
+			return 0x08;
+		}
+	}
+	return 0;
+}
+
+static void adjust_time( blargg_long& time, hes_time_t delta )
+{
+	if ( time < future_hes_time )
+	{
+		time -= delta;
+		if ( time < 0 )
+			time = 0;
+	}
+}
+
+blargg_err_t Hes_Emu::run_clocks( blip_time_t& duration_, int )
+{
+	blip_time_t const duration = duration_; // cache
+	
+	if ( cpu::run( duration ) )
+		set_warning( "Emulation error (illegal instruction)" );
+	
+	check( time() >= duration );
+	//check( time() - duration < 20 ); // Txx instruction could cause going way over
+	
+	run_until( duration );
+	
+	// end time frame
+	timer.last_time -= duration;
+	vdp.next_vbl    -= duration;
+	#if GME_FRAME_HOOK_DEFINED
+		last_frame_hook -= duration;
+	#endif
+	cpu::end_frame( duration );
+	::adjust_time( irq.timer, duration );
+	::adjust_time( irq.vdp,   duration );
+	apu.end_frame( duration );
+	
+	return 0;
+}
diff --git a/libraries/game-music-emu/gme/Hes_Emu.h b/libraries/game-music-emu/gme/Hes_Emu.h
new file mode 100644
index 000000000..08c1370d4
--- /dev/null
+++ b/libraries/game-music-emu/gme/Hes_Emu.h
@@ -0,0 +1,94 @@
+// TurboGrafx-16/PC Engine HES music file emulator
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef HES_EMU_H
+#define HES_EMU_H
+
+#include "Classic_Emu.h"
+#include "Hes_Apu.h"
+#include "Hes_Cpu.h"
+
+class Hes_Emu : private Hes_Cpu, public Classic_Emu {
+	typedef Hes_Cpu cpu;
+public:
+	// HES file header
+	enum { header_size = 0x20 };
+	struct header_t
+	{
+		byte tag [4];
+		byte vers;
+		byte first_track;
+		byte init_addr [2];
+		byte banks [8];
+		byte data_tag [4];
+		byte size [4];
+		byte addr [4];
+		byte unused [4];
+	};
+	
+	// Header for currently loaded file
+	header_t const& header() const { return header_; }
+	
+	static gme_type_t static_type() { return gme_hes_type; }
+
+public:
+	Hes_Emu();
+	~Hes_Emu();
+protected:
+	blargg_err_t track_info_( track_info_t*, int track ) const;
+	blargg_err_t load_( Data_Reader& );
+	blargg_err_t start_track_( int );
+	blargg_err_t run_clocks( blip_time_t&, int );
+	void set_tempo_( double );
+	void set_voice( int, Blip_Buffer*, Blip_Buffer*, Blip_Buffer* );
+	void update_eq( blip_eq_t const& );
+	void unload();
+public: private: friend class Hes_Cpu;
+	byte* write_pages [page_count + 1]; // 0 if unmapped or I/O space
+	
+	int cpu_read_( hes_addr_t );
+	int cpu_read( hes_addr_t );
+	void cpu_write_( hes_addr_t, int data );
+	void cpu_write( hes_addr_t, int );
+	void cpu_write_vdp( int addr, int data );
+	byte const* cpu_set_mmr( int page, int bank );
+	int cpu_done();
+private:
+	Rom_Data<page_size> rom;
+	header_t header_;
+	hes_time_t play_period;
+	hes_time_t last_frame_hook;
+	int timer_base;
+	
+	struct {
+		hes_time_t last_time;
+		blargg_long count;
+		blargg_long load;
+		int raw_load;
+		byte enabled;
+		byte fired;
+	} timer;
+	
+	struct {
+		hes_time_t next_vbl;
+		byte latch;
+		byte control;
+	} vdp;
+	
+	struct {
+		hes_time_t timer;
+		hes_time_t vdp;
+		byte disables;
+	} irq;
+	
+	void recalc_timer_load();
+	
+	// large items
+	Hes_Apu apu;
+	byte sgx [3 * page_size + cpu_padding];
+	
+	void irq_changed();
+	void run_until( hes_time_t );
+};
+
+#endif
diff --git a/libraries/game-music-emu/gme/Kss_Cpu.cpp b/libraries/game-music-emu/gme/Kss_Cpu.cpp
new file mode 100644
index 000000000..f3857680f
--- /dev/null
+++ b/libraries/game-music-emu/gme/Kss_Cpu.cpp
@@ -0,0 +1,1700 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+/*
+Last validated with zexall 2006.11.14 2:19 PM
+* Doesn't implement the R register or immediate interrupt after EI.
+* Address wrap-around isn't completely correct, but is prevented from crashing emulator.
+*/
+
+#include "Kss_Cpu.h"
+
+#include "blargg_endian.h"
+#include <string.h>
+
+//#include "z80_cpu_log.h"
+
+/* Copyright (C) 2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#define SYNC_TIME()     (void) (s.time = s_time)
+#define RELOAD_TIME()   (void) (s_time = s.time)
+
+// Callbacks to emulator
+
+#define CPU_OUT( cpu, addr, data, time )\
+	kss_cpu_out( this, time, addr, data )
+
+#define CPU_IN( cpu, addr, time )\
+	kss_cpu_in( this, time, addr )
+
+#define CPU_WRITE( cpu, addr, data, time )\
+	(SYNC_TIME(), kss_cpu_write( this, addr, data ))
+
+#include "blargg_source.h"
+
+// flags, named with hex value for clarity
+int const S80 = 0x80;
+int const Z40 = 0x40;
+int const F20 = 0x20;
+int const H10 = 0x10;
+int const F08 = 0x08;
+int const V04 = 0x04;
+int const P04 = 0x04;
+int const N02 = 0x02;
+int const C01 = 0x01;
+
+#define SZ28P( n )  szpc [n]
+#define SZ28PC( n ) szpc [n]
+#define SZ28C( n )  (szpc [n] & ~P04)
+#define SZ28( n )   SZ28C( n )
+
+#define SET_R( n )  (void) (r.r = n)
+#define GET_R()     (r.r)
+
+Kss_Cpu::Kss_Cpu()
+{
+	state = &state_;
+	
+	for ( int i = 0x100; --i >= 0; )
+	{
+		int even = 1;
+		for ( int p = i; p; p >>= 1 )
+			even ^= p;
+		int n = (i & (S80 | F20 | F08)) | ((even & 1) * P04);
+		szpc [i] = n;
+		szpc [i + 0x100] = n | C01;
+	}
+	szpc [0x000] |= Z40;
+	szpc [0x100] |= Z40;
+}
+
+inline void Kss_Cpu::set_page( int i, void* write, void const* read )
+{
+	blargg_long offset = KSS_CPU_PAGE_OFFSET( i * (blargg_long) page_size );
+	state->write [i] = (byte      *) write - offset;
+	state->read  [i] = (byte const*) read  - offset;
+}
+
+void Kss_Cpu::reset( void* unmapped_write, void const* unmapped_read )
+{
+	check( state == &state_ );
+	state = &state_;
+	state_.time = 0;
+	state_.base = 0;
+	end_time_   = 0;
+	
+	for ( int i = 0; i < page_count + 1; i++ )
+		set_page( i, unmapped_write, unmapped_read );
+	
+	memset( &r, 0, sizeof r );
+}
+
+void Kss_Cpu::map_mem( unsigned addr, blargg_ulong size, void* write, void const* read )
+{
+	// address range must begin and end on page boundaries
+	require( addr % page_size == 0 );
+	require( size % page_size == 0 );
+	
+	unsigned first_page = addr / page_size;
+	for ( unsigned i = size / page_size; i--; )
+	{
+		blargg_long offset = i * (blargg_long) page_size;
+		set_page( first_page + i, (byte*) write + offset, (byte const*) read + offset );
+	}
+}
+
+#define TIME                        (s_time + s.base)
+#define RW_MEM( addr, rw )          (s.rw [(addr) >> page_shift] [KSS_CPU_PAGE_OFFSET( addr )])
+#define READ_PROG( addr )           RW_MEM( addr, read )
+#define READ( addr )                READ_PROG( addr )
+//#define WRITE( addr, data )       (void) (RW_MEM( addr, write ) = data)
+#define WRITE( addr, data )         CPU_WRITE( this, addr, data, TIME )
+#define READ_WORD( addr )           GET_LE16( &READ( addr ) )
+#define WRITE_WORD( addr, data )    SET_LE16( &RW_MEM( addr, write ), data )
+#define IN( addr )                  CPU_IN( this, addr, TIME )
+#define OUT( addr, data )           CPU_OUT( this, addr, data, TIME )
+
+#if BLARGG_BIG_ENDIAN
+	#define R8( n, offset ) ((r8_ - offset) [n]) 
+#elif BLARGG_LITTLE_ENDIAN
+	#define R8( n, offset ) ((r8_ - offset) [(n) ^ 1]) 
+#else
+	#error "Byte order of CPU must be known"
+#endif
+
+//#define R16( n, shift, offset )   (r16_ [((n) >> shift) - (offset >> shift)])
+
+// help compiler see that it can just adjust stack offset, saving an extra instruction
+#define R16( n, shift, offset )\
+	(*(uint16_t*) ((char*) r16_ - (offset >> (shift - 1)) + ((n) >> (shift - 1))))
+
+#define CASE5( a, b, c, d, e          ) case 0x##a:case 0x##b:case 0x##c:case 0x##d:case 0x##e
+#define CASE6( a, b, c, d, e, f       ) CASE5( a, b, c, d, e       ): case 0x##f
+#define CASE7( a, b, c, d, e, f, g    ) CASE6( a, b, c, d, e, f    ): case 0x##g
+#define CASE8( a, b, c, d, e, f, g, h ) CASE7( a, b, c, d, e, f, g ): case 0x##h
+
+// high four bits are $ED time - 8, low four bits are $DD/$FD time - 8
+static byte const ed_dd_timing [0x100] = {
+//0    1    2    3    4    5    6    7    8    9    A    B    C    D    E    F
+0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x00,0x00,0x00,
+0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x00,0x00,0x00,
+0x00,0x06,0x0C,0x02,0x00,0x00,0x03,0x00,0x00,0x07,0x0C,0x02,0x00,0x00,0x03,0x00,
+0x00,0x00,0x00,0x00,0x0F,0x0F,0x0B,0x00,0x00,0x07,0x00,0x00,0x00,0x00,0x00,0x00,
+0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0x10,0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0x10,
+0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0x10,0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0x10,
+0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0xA0,0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0xA0,
+0x4B,0x4B,0x7B,0xCB,0x0B,0x6B,0x00,0x0B,0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0x00,
+0x00,0x00,0x00,0x00,0x00,0x00,0x0B,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0B,0x00,
+0x00,0x00,0x00,0x00,0x00,0x00,0x0B,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0B,0x00,
+0x80,0x80,0x80,0x80,0x00,0x00,0x0B,0x00,0x80,0x80,0x80,0x80,0x00,0x00,0x0B,0x00,
+0xD0,0xD0,0xD0,0xD0,0x00,0x00,0x0B,0x00,0xD0,0xD0,0xD0,0xD0,0x00,0x00,0x0B,0x00,
+0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0x00,0x00,0x00,0x00,
+0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+0x00,0x06,0x00,0x0F,0x00,0x07,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x00,0x00,0x00,
+};
+
+bool Kss_Cpu::run( cpu_time_t end_time )
+{
+	set_end_time( end_time );
+	state_t s = this->state_;
+	this->state = &s;
+	bool warning = false;
+	
+	union {
+		regs_t rg;
+		pairs_t rp;
+		uint8_t r8_ [8]; // indexed
+		uint16_t r16_ [4];
+	};
+	rg = this->r.b;
+	
+	cpu_time_t s_time = s.time;
+	uint_fast32_t pc = r.pc;
+	uint_fast32_t sp = r.sp;
+	uint_fast32_t ix = r.ix; // TODO: keep in memory for direct access?
+	uint_fast32_t iy = r.iy;
+	int flags = r.b.flags;
+	
+	goto loop;
+jr_not_taken:
+	s_time -= 5;
+	goto loop;
+call_not_taken:
+	s_time -= 7; 
+jp_not_taken:
+	pc += 2;
+loop:
+	
+	check( (unsigned long) pc < 0x10000 );
+	check( (unsigned long) sp < 0x10000 );
+	check( (unsigned) flags < 0x100 );
+	check( (unsigned) ix < 0x10000 );
+	check( (unsigned) iy < 0x10000 );
+	
+	uint8_t const* instr = s.read [pc >> page_shift];
+#define GET_ADDR()  GET_LE16( instr )
+	
+	uint_fast8_t opcode;
+	
+	// TODO: eliminate this special case
+	#if BLARGG_NONPORTABLE
+		opcode = instr [pc];
+		pc++;
+		instr += pc;
+	#else
+		instr += KSS_CPU_PAGE_OFFSET( pc );
+		opcode = *instr++;
+		pc++;
+	#endif
+	
+	static byte const base_timing [0x100] = {
+	//   0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+		 4,10, 7, 6, 4, 4, 7, 4, 4,11, 7, 6, 4, 4, 7, 4, // 0
+		13,10, 7, 6, 4, 4, 7, 4,12,11, 7, 6, 4, 4, 7, 4, // 1
+		12,10,16, 6, 4, 4, 7, 4,12,11,16, 6, 4, 4, 7, 4, // 2
+		12,10,13, 6,11,11,10, 4,12,11,13, 6, 4, 4, 7, 4, // 3
+		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // 4
+		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // 5
+		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // 6
+		 7, 7, 7, 7, 7, 7, 4, 7, 4, 4, 4, 4, 4, 4, 7, 4, // 7
+		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // 8
+		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // 9
+		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // A
+		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // B
+		11,10,10,10,17,11, 7,11,11,10,10, 8,17,17, 7,11, // C
+		11,10,10,11,17,11, 7,11,11, 4,10,11,17, 8, 7,11, // D
+		11,10,10,19,17,11, 7,11,11, 4,10, 4,17, 8, 7,11, // E
+		11,10,10, 4,17,11, 7,11,11, 6,10, 4,17, 8, 7,11, // F
+	};
+	
+	uint_fast16_t data;
+	data = base_timing [opcode];
+	if ( (s_time += data) >= 0 )
+		goto possibly_out_of_time;
+almost_out_of_time:
+	
+	data = READ_PROG( pc );
+	
+	#ifdef Z80_CPU_LOG_H
+		//log_opcode( opcode, READ_PROG( pc ) );
+		z80_log_regs( rg.a, rp.bc, rp.de, rp.hl, sp, ix, iy );
+		z80_cpu_log( "new", pc - 1, opcode, READ_PROG( pc ),
+				READ_PROG( pc + 1 ), READ_PROG( pc + 2 ) );
+	#endif
+	
+	switch ( opcode )
+	{
+possibly_out_of_time:
+		if ( s_time < (int) data )
+			goto almost_out_of_time;
+		s_time -= data;
+		goto out_of_time;
+
+// Common
+
+	case 0x00: // NOP
+	CASE7( 40, 49, 52, 5B, 64, 6D, 7F ): // LD B,B etc.
+		goto loop;
+	
+	case 0x08:{// EX AF,AF'
+		int temp = r.alt.b.a;
+		r.alt.b.a = rg.a;
+		rg.a = temp;
+		
+		temp = r.alt.b.flags;
+		r.alt.b.flags = flags;
+		flags = temp;
+		goto loop;
+	}
+	
+	case 0xD3: // OUT (imm),A
+		pc++;
+		OUT( data + rg.a * 0x100, rg.a );
+		goto loop;
+		
+	case 0x2E: // LD L,imm
+		pc++;
+		rg.l = data;
+		goto loop;
+	
+	case 0x3E: // LD A,imm
+		pc++;
+		rg.a = data;
+		goto loop;
+	
+	case 0x3A:{// LD A,(addr)
+		uint_fast16_t addr = GET_ADDR();
+		pc += 2;
+		rg.a = READ( addr );
+		goto loop;
+	}
+	
+// Conditional
+
+#define ZERO    (flags & Z40)
+#define CARRY   (flags & C01)
+#define EVEN    (flags & P04)
+#define MINUS   (flags & S80)
+
+// JR
+// TODO: more efficient way to handle negative branch that wraps PC around
+#define JR( cond ) {\
+	int offset = (int8_t) data;\
+	pc++;\
+	if ( !(cond) )\
+		goto jr_not_taken;\
+	pc = uint16_t (pc + offset);\
+	goto loop;\
+}
+	
+	case 0x20: JR( !ZERO  ) // JR NZ,disp
+	case 0x28: JR(  ZERO  ) // JR Z,disp
+	case 0x30: JR( !CARRY ) // JR NC,disp
+	case 0x38: JR(  CARRY ) // JR C,disp
+	case 0x18: JR(  true  ) // JR disp
+
+	case 0x10:{// DJNZ disp
+		int temp = rg.b - 1;
+		rg.b = temp;
+		JR( temp )
+	}
+	
+// JP
+#define JP( cond )  if ( !(cond) ) goto jp_not_taken; pc = GET_ADDR(); goto loop;
+	
+	case 0xC2: JP( !ZERO  ) // JP NZ,addr
+	case 0xCA: JP(  ZERO  ) // JP Z,addr
+	case 0xD2: JP( !CARRY ) // JP NC,addr
+	case 0xDA: JP(  CARRY ) // JP C,addr
+	case 0xE2: JP( !EVEN  ) // JP PO,addr
+	case 0xEA: JP(  EVEN  ) // JP PE,addr
+	case 0xF2: JP( !MINUS ) // JP P,addr
+	case 0xFA: JP(  MINUS ) // JP M,addr
+	
+	case 0xC3: // JP addr
+		pc = GET_ADDR();
+		goto loop;
+	
+	case 0xE9: // JP HL
+		pc = rp.hl;
+		goto loop;
+
+// RET
+#define RET( cond ) if ( cond ) goto ret_taken; s_time -= 6; goto loop;
+	
+	case 0xC0: RET( !ZERO  ) // RET NZ
+	case 0xC8: RET(  ZERO  ) // RET Z
+	case 0xD0: RET( !CARRY ) // RET NC
+	case 0xD8: RET(  CARRY ) // RET C
+	case 0xE0: RET( !EVEN  ) // RET PO
+	case 0xE8: RET(  EVEN  ) // RET PE
+	case 0xF0: RET( !MINUS ) // RET P
+	case 0xF8: RET(  MINUS ) // RET M
+	
+	case 0xC9: // RET
+	ret_taken:
+		pc = READ_WORD( sp );
+		sp = uint16_t (sp + 2);
+		goto loop;
+	
+// CALL
+#define CALL( cond ) if ( cond ) goto call_taken; goto call_not_taken;
+
+	case 0xC4: CALL( !ZERO  ) // CALL NZ,addr
+	case 0xCC: CALL(  ZERO  ) // CALL Z,addr
+	case 0xD4: CALL( !CARRY ) // CALL NC,addr
+	case 0xDC: CALL(  CARRY ) // CALL C,addr
+	case 0xE4: CALL( !EVEN  ) // CALL PO,addr
+	case 0xEC: CALL(  EVEN  ) // CALL PE,addr
+	case 0xF4: CALL( !MINUS ) // CALL P,addr
+	case 0xFC: CALL(  MINUS ) // CALL M,addr
+	
+	case 0xCD:{// CALL addr
+	call_taken:
+		uint_fast16_t addr = pc + 2;
+		pc = GET_ADDR();
+		sp = uint16_t (sp - 2);
+		WRITE_WORD( sp, addr );
+		goto loop;
+	}
+	
+	case 0xFF: // RST
+		if ( pc > idle_addr )
+			goto hit_idle_addr;
+	CASE7( C7, CF, D7, DF, E7, EF, F7 ):
+		data = pc;
+		pc = opcode & 0x38;
+		goto push_data;
+
+// PUSH/POP
+	case 0xF5: // PUSH AF
+		data = rg.a * 0x100u + flags;
+		goto push_data;
+	
+	case 0xC5: // PUSH BC
+	case 0xD5: // PUSH DE
+	case 0xE5: // PUSH HL
+		data = R16( opcode, 4, 0xC5 );
+	push_data:
+		sp = uint16_t (sp - 2);
+		WRITE_WORD( sp, data );
+		goto loop;
+	
+	case 0xF1: // POP AF
+		flags = READ( sp );
+		rg.a = READ( sp + 1 );
+		sp = uint16_t (sp + 2);
+		goto loop;
+	
+	case 0xC1: // POP BC
+	case 0xD1: // POP DE
+	case 0xE1: // POP HL
+		R16( opcode, 4, 0xC1 ) = READ_WORD( sp );
+		sp = uint16_t (sp + 2);
+		goto loop;
+	
+// ADC/ADD/SBC/SUB
+	case 0x96: // SUB (HL)
+	case 0x86: // ADD (HL)
+		flags &= ~C01;
+	case 0x9E: // SBC (HL)
+	case 0x8E: // ADC (HL)
+		data = READ( rp.hl );
+		goto adc_data;
+	
+	case 0xD6: // SUB A,imm
+	case 0xC6: // ADD imm
+		flags &= ~C01;
+	case 0xDE: // SBC A,imm
+	case 0xCE: // ADC imm
+		pc++;
+		goto adc_data;
+	
+	CASE7( 90, 91, 92, 93, 94, 95, 97 ): // SUB r
+	CASE7( 80, 81, 82, 83, 84, 85, 87 ): // ADD r
+		flags &= ~C01;
+	CASE7( 98, 99, 9A, 9B, 9C, 9D, 9F ): // SBC r
+	CASE7( 88, 89, 8A, 8B, 8C, 8D, 8F ): // ADC r
+		data = R8( opcode & 7, 0 );
+	adc_data: {
+		int result = data + (flags & C01);
+		data ^= rg.a;
+		flags = opcode >> 3 & N02; // bit 4 is set in subtract opcodes
+		if ( flags )
+			result = -result;
+		result += rg.a;
+		data ^= result;
+		flags |=(data & H10) |
+				((data - -0x80) >> 6 & V04) |
+				SZ28C( result & 0x1FF );
+		rg.a = result;
+		goto loop;
+	}
+
+// CP
+	case 0xBE: // CP (HL)
+		data = READ( rp.hl );
+		goto cp_data;
+	
+	case 0xFE: // CP imm
+		pc++;
+		goto cp_data;
+	
+	CASE7( B8, B9, BA, BB, BC, BD, BF ): // CP r
+		data = R8( opcode, 0xB8 );
+	cp_data: {
+		int result = rg.a - data;
+		flags = N02 | (data & (F20 | F08)) | (result >> 8 & C01);
+		data ^= rg.a;
+		flags |=(((result ^ rg.a) & data) >> 5 & V04) |
+				(((data & H10) ^ result) & (S80 | H10));
+		if ( (uint8_t) result )
+			goto loop;
+		flags |= Z40;
+		goto loop;
+	}
+	
+// ADD HL,rp
+	
+	case 0x39: // ADD HL,SP
+		data = sp;
+		goto add_hl_data;
+	
+	case 0x09: // ADD HL,BC
+	case 0x19: // ADD HL,DE
+	case 0x29: // ADD HL,HL
+		data = R16( opcode, 4, 0x09 );
+	add_hl_data: {
+		blargg_ulong sum = rp.hl + data;
+		data ^= rp.hl;
+		rp.hl = sum;
+		flags = (flags & (S80 | Z40 | V04)) |
+				(sum >> 16) |
+				(sum >> 8 & (F20 | F08)) |
+				((data ^ sum) >> 8 & H10);
+		goto loop;
+	}
+	
+	case 0x27:{// DAA
+		int a = rg.a;
+		if ( a > 0x99 )
+			flags |= C01;
+		
+		int adjust = 0x60 & -(flags & C01);
+		
+		if ( flags & H10 || (a & 0x0F) > 9 )
+			adjust |= 0x06;
+		
+		if ( flags & N02 )
+			adjust = -adjust;
+		a += adjust;
+		
+		flags = (flags & (C01 | N02)) |
+				((rg.a ^ a) & H10) |
+				SZ28P( (uint8_t) a );
+		rg.a = a;
+		goto loop;
+	}
+	/*
+	case 0x27:{// DAA
+		// more optimized, but probably not worth the obscurity
+		int f = (rg.a + (0xFF - 0x99)) >> 8 | flags; // (a > 0x99 ? C01 : 0) | flags
+		int adjust = 0x60 & -(f & C01); // f & C01 ? 0x60 : 0
+		
+		if ( (((rg.a + (0x0F - 9)) ^ rg.a) | f) & H10 ) // flags & H10 || (rg.a & 0x0F) > 9
+			adjust |= 0x06;
+		
+		if ( f & N02 )
+			adjust = -adjust;
+		int a = rg.a + adjust;
+		
+		flags = (f & (N02 | C01)) | ((rg.a ^ a) & H10) | SZ28P( (uint8_t) a );
+		rg.a = a;
+		goto loop;
+	}
+	*/
+	
+// INC/DEC
+	case 0x34: // INC (HL)
+		data = READ( rp.hl ) + 1;
+		WRITE( rp.hl, data );
+		goto inc_set_flags;
+	
+	CASE7( 04, 0C, 14, 1C, 24, 2C, 3C ): // INC r
+		data = ++R8( opcode >> 3, 0 );
+	inc_set_flags:
+		flags = (flags & C01) |
+				(((data & 0x0F) - 1) & H10) |
+				SZ28( (uint8_t) data );
+		if ( data != 0x80 )
+			goto loop;
+		flags |= V04;
+		goto loop;
+	
+	case 0x35: // DEC (HL)
+		data = READ( rp.hl ) - 1;
+		WRITE( rp.hl, data );
+		goto dec_set_flags;
+	
+	CASE7( 05, 0D, 15, 1D, 25, 2D, 3D ): // DEC r
+		data = --R8( opcode >> 3, 0 );
+	dec_set_flags:
+		flags = (flags & C01) | N02 |
+				(((data & 0x0F) + 1) & H10) |
+				SZ28( (uint8_t) data );
+		if ( data != 0x7F )
+			goto loop;
+		flags |= V04;
+		goto loop;
+
+	case 0x03: // INC BC
+	case 0x13: // INC DE
+	case 0x23: // INC HL
+		R16( opcode, 4, 0x03 )++;
+		goto loop;
+	
+	case 0x33: // INC SP
+		sp = uint16_t (sp + 1);
+		goto loop;
+	
+	case 0x0B: // DEC BC
+	case 0x1B: // DEC DE
+	case 0x2B: // DEC HL
+		R16( opcode, 4, 0x0B )--;
+		goto loop;
+	
+	case 0x3B: // DEC SP
+		sp = uint16_t (sp - 1);
+		goto loop;
+	
+// AND
+	case 0xA6: // AND (HL)
+		data = READ( rp.hl );
+		goto and_data;
+	
+	case 0xE6: // AND imm
+		pc++;
+		goto and_data;
+	
+	CASE7( A0, A1, A2, A3, A4, A5, A7 ): // AND r
+		data = R8( opcode, 0xA0 );
+	and_data:
+		rg.a &= data;
+		flags = SZ28P( rg.a ) | H10;
+		goto loop;
+	
+// OR
+	case 0xB6: // OR (HL)
+		data = READ( rp.hl );
+		goto or_data;
+	
+	case 0xF6: // OR imm
+		pc++;
+		goto or_data;
+	
+	CASE7( B0, B1, B2, B3, B4, B5, B7 ): // OR r
+		data = R8( opcode, 0xB0 );
+	or_data:
+		rg.a |= data;
+		flags = SZ28P( rg.a );
+		goto loop;
+
+// XOR
+	case 0xAE: // XOR (HL)
+		data = READ( rp.hl );
+		goto xor_data;
+	
+	case 0xEE: // XOR imm
+		pc++;
+		goto xor_data;
+	
+	CASE7( A8, A9, AA, AB, AC, AD, AF ): // XOR r
+		data = R8( opcode, 0xA8 );
+	xor_data:
+		rg.a ^= data;
+		flags = SZ28P( rg.a );
+		goto loop;
+
+// LD
+	CASE7( 70, 71, 72, 73, 74, 75, 77 ): // LD (HL),r
+		WRITE( rp.hl, R8( opcode, 0x70 ) );
+		goto loop;
+	
+	CASE6( 41, 42, 43, 44, 45, 47 ): // LD B,r
+	CASE6( 48, 4A, 4B, 4C, 4D, 4F ): // LD C,r
+	CASE6( 50, 51, 53, 54, 55, 57 ): // LD D,r
+	CASE6( 58, 59, 5A, 5C, 5D, 5F ): // LD E,r
+	CASE6( 60, 61, 62, 63, 65, 67 ): // LD H,r
+	CASE6( 68, 69, 6A, 6B, 6C, 6F ): // LD L,r
+	CASE6( 78, 79, 7A, 7B, 7C, 7D ): // LD A,r
+		R8( opcode >> 3 & 7, 0 ) = R8( opcode & 7, 0 );
+		goto loop;
+	
+	CASE5( 06, 0E, 16, 1E, 26 ): // LD r,imm
+		R8( opcode >> 3, 0 ) = data;
+		pc++;
+		goto loop;
+	
+	case 0x36: // LD (HL),imm
+		pc++;
+		WRITE( rp.hl, data );
+		goto loop;
+	
+	CASE7( 46, 4E, 56, 5E, 66, 6E, 7E ): // LD r,(HL)
+		R8( opcode >> 3, 8 ) = READ( rp.hl );
+		goto loop;
+	
+	case 0x01: // LD rp,imm
+	case 0x11:
+	case 0x21:
+		R16( opcode, 4, 0x01 ) = GET_ADDR();
+		pc += 2;
+		goto loop;
+	
+	case 0x31: // LD sp,imm
+		sp = GET_ADDR();
+		pc += 2;
+		goto loop;
+	
+	case 0x2A:{// LD HL,(addr)
+		uint_fast16_t addr = GET_ADDR();
+		pc += 2;
+		rp.hl = READ_WORD( addr );
+		goto loop;
+	}
+	
+	case 0x32:{// LD (addr),A
+		uint_fast16_t addr = GET_ADDR();
+		pc += 2;
+		WRITE( addr, rg.a );
+		goto loop;
+	}
+	
+	case 0x22:{// LD (addr),HL
+		uint_fast16_t addr = GET_ADDR();
+		pc += 2;
+		WRITE_WORD( addr, rp.hl );
+		goto loop;
+	}
+	
+	case 0x02: // LD (BC),A
+	case 0x12: // LD (DE),A
+		WRITE( R16( opcode, 4, 0x02 ), rg.a );
+		goto loop;
+	
+	case 0x0A: // LD A,(BC)
+	case 0x1A: // LD A,(DE)
+		rg.a = READ( R16( opcode, 4, 0x0A ) );
+		goto loop;
+	
+	case 0xF9: // LD SP,HL
+		sp = rp.hl;
+		goto loop;
+	
+// Rotate
+	
+	case 0x07:{// RLCA
+		uint_fast16_t temp = rg.a;
+		temp = (temp << 1) | (temp >> 7);
+		flags = (flags & (S80 | Z40 | P04)) |
+				(temp & (F20 | F08 | C01));
+		rg.a = temp;
+		goto loop;
+	}
+	
+	case 0x0F:{// RRCA
+		uint_fast16_t temp = rg.a;
+		flags = (flags & (S80 | Z40 | P04)) |
+				(temp & C01);
+		temp = (temp << 7) | (temp >> 1);
+		flags |= temp & (F20 | F08);
+		rg.a = temp;
+		goto loop;
+	}
+	
+	case 0x17:{// RLA
+		blargg_ulong temp = (rg.a << 1) | (flags & C01);
+		flags = (flags & (S80 | Z40 | P04)) |
+				(temp & (F20 | F08)) |
+				(temp >> 8);
+		rg.a = (uint8_t)temp;
+		goto loop;
+	}
+	
+	case 0x1F:{// RRA
+		uint_fast16_t temp = (flags << 7) | (rg.a >> 1);
+		flags = (flags & (S80 | Z40 | P04)) |
+				(temp & (F20 | F08)) |
+				(rg.a & C01);
+		rg.a = temp;
+		goto loop;
+	}
+	
+// Misc
+	case 0x2F:{// CPL
+		uint_fast16_t temp = ~rg.a;
+		flags = (flags & (S80 | Z40 | P04 | C01)) |
+				(temp & (F20 | F08)) |
+				(H10 | N02);
+		rg.a = temp;
+		goto loop;
+	}
+	
+	case 0x3F:{// CCF
+		flags = ((flags & (S80 | Z40 | P04 | C01)) ^ C01) |
+				(flags << 4 & H10) |
+				(rg.a & (F20 | F08));
+		goto loop;
+	}
+	
+	case 0x37: // SCF
+		flags = (flags & (S80 | Z40 | P04)) | C01 |
+				(rg.a & (F20 | F08));
+		goto loop;
+	
+	case 0xDB: // IN A,(imm)
+		pc++;
+		rg.a = IN( data + rg.a * 0x100 );
+		goto loop;
+
+	case 0xE3:{// EX (SP),HL
+		uint_fast16_t temp = READ_WORD( sp );
+		WRITE_WORD( sp, rp.hl );
+		rp.hl = temp;
+		goto loop;
+	}
+	
+	case 0xEB:{// EX DE,HL
+		uint_fast16_t temp = rp.hl;
+		rp.hl = rp.de;
+		rp.de = temp;
+		goto loop;
+	}
+	
+	case 0xD9:{// EXX DE,HL
+		uint_fast16_t temp = r.alt.w.bc;
+		r.alt.w.bc = rp.bc;
+		rp.bc = temp;
+		
+		temp = r.alt.w.de;
+		r.alt.w.de = rp.de;
+		rp.de = temp;
+		
+		temp = r.alt.w.hl;
+		r.alt.w.hl = rp.hl;
+		rp.hl = temp;
+		goto loop;
+	}
+	
+	case 0xF3: // DI
+		r.iff1 = 0;
+		r.iff2 = 0;
+		goto loop;
+	
+	case 0xFB: // EI
+		r.iff1 = 1;
+		r.iff2 = 1;
+		// TODO: delayed effect
+		goto loop;
+	
+	case 0x76: // HALT
+		goto halt;
+	
+//////////////////////////////////////// CB prefix
+	{
+	case 0xCB:
+		unsigned data2;
+		data2 = instr [1];
+		(void) data2; // TODO is this the same as data in all cases?
+		pc++;
+		switch ( data )
+		{
+	
+	// Rotate left
+		
+	#define RLC( read, write ) {\
+		uint_fast8_t result = read;\
+		result = uint8_t (result << 1) | (result >> 7);\
+		flags = SZ28P( result ) | (result & C01);\
+		write;\
+		goto loop;\
+	}
+		
+		case 0x06: // RLC (HL)
+			s_time += 7;
+			data = rp.hl;
+		rlc_data_addr:
+			RLC( READ( data ), WRITE( data, result ) )
+		
+		CASE7( 00, 01, 02, 03, 04, 05, 07 ):{// RLC r
+			uint8_t& reg = R8( data, 0 );
+			RLC( reg, reg = result )
+		}
+		
+	#define RL( read, write ) {\
+		uint_fast16_t result = (read << 1) | (flags & C01);\
+		flags = SZ28PC( result );\
+		write;\
+		goto loop;\
+	}
+		
+		case 0x16: // RL (HL)
+			s_time += 7;
+			data = rp.hl;
+		rl_data_addr:
+			RL( READ( data ), WRITE( data, result ) )
+		
+		CASE7( 10, 11, 12, 13, 14, 15, 17 ):{// RL r
+			uint8_t& reg = R8( data, 0x10 );
+			RL( reg, reg = result )
+		}
+		
+	#define SLA( read, add, write ) {\
+		uint_fast16_t result = (read << 1) | add;\
+		flags = SZ28PC( result );\
+		write;\
+		goto loop;\
+	}
+		
+		case 0x26: // SLA (HL)
+			s_time += 7;
+			data = rp.hl;
+		sla_data_addr:
+			SLA( READ( data ), 0, WRITE( data, result ) )
+		
+		CASE7( 20, 21, 22, 23, 24, 25, 27 ):{// SLA r
+			uint8_t& reg = R8( data, 0x20 );
+			SLA( reg, 0, reg = result )
+		}
+		
+		case 0x36: // SLL (HL)
+			s_time += 7;
+			data = rp.hl;
+		sll_data_addr:
+			SLA( READ( data ), 1, WRITE( data, result ) )
+		
+		CASE7( 30, 31, 32, 33, 34, 35, 37 ):{// SLL r
+			uint8_t& reg = R8( data, 0x30 );
+			SLA( reg, 1, reg = result )
+		}
+		
+	// Rotate right
+		
+	#define RRC( read, write ) {\
+		uint_fast8_t result = read;\
+		flags = result & C01;\
+		result = uint8_t (result << 7) | (result >> 1);\
+		flags |= SZ28P( result );\
+		write;\
+		goto loop;\
+	}
+		
+		case 0x0E: // RRC (HL)
+			s_time += 7;
+			data = rp.hl;
+		rrc_data_addr:
+			RRC( READ( data ), WRITE( data, result ) )
+		
+		CASE7( 08, 09, 0A, 0B, 0C, 0D, 0F ):{// RRC r
+			uint8_t& reg = R8( data, 0x08 );
+			RRC( reg, reg = result )
+		}
+		
+	#define RR( read, write ) {\
+		uint_fast8_t result = read;\
+		uint_fast8_t temp = result & C01;\
+		result = uint8_t (flags << 7) | (result >> 1);\
+		flags = SZ28P( result ) | temp;\
+		write;\
+		goto loop;\
+	}
+		
+		case 0x1E: // RR (HL)
+			s_time += 7;
+			data = rp.hl;
+		rr_data_addr:
+			RR( READ( data ), WRITE( data, result ) )
+		
+		CASE7( 18, 19, 1A, 1B, 1C, 1D, 1F ):{// RR r
+			uint8_t& reg = R8( data, 0x18 );
+			RR( reg, reg = result )
+		}
+		
+	#define SRA( read, write ) {\
+		uint_fast8_t result = read;\
+		flags = result & C01;\
+		result = (result & 0x80) | (result >> 1);\
+		flags |= SZ28P( result );\
+		write;\
+		goto loop;\
+	}
+		
+		case 0x2E: // SRA (HL)
+			data = rp.hl;
+			s_time += 7;
+		sra_data_addr:
+			SRA( READ( data ), WRITE( data, result ) )
+		
+		CASE7( 28, 29, 2A, 2B, 2C, 2D, 2F ):{// SRA r
+			uint8_t& reg = R8( data, 0x28 );
+			SRA( reg, reg = result )
+		}
+		
+	#define SRL( read, write ) {\
+		uint_fast8_t result = read;\
+		flags = result & C01;\
+		result >>= 1;\
+		flags |= SZ28P( result );\
+		write;\
+		goto loop;\
+	}
+		
+		case 0x3E: // SRL (HL)
+			s_time += 7;
+			data = rp.hl;
+		srl_data_addr:
+			SRL( READ( data ), WRITE( data, result ) )
+		
+		CASE7( 38, 39, 3A, 3B, 3C, 3D, 3F ):{// SRL r
+			uint8_t& reg = R8( data, 0x38 );
+			SRL( reg, reg = result )
+		}
+		
+	// BIT
+		{
+			unsigned temp;
+		CASE8( 46, 4E, 56, 5E, 66, 6E, 76, 7E ): // BIT b,(HL)
+			s_time += 4;
+			temp = READ( rp.hl );
+			flags &= C01;
+			goto bit_temp;
+		CASE7( 40, 41, 42, 43, 44, 45, 47 ): // BIT 0,r
+		CASE7( 48, 49, 4A, 4B, 4C, 4D, 4F ): // BIT 1,r
+		CASE7( 50, 51, 52, 53, 54, 55, 57 ): // BIT 2,r
+		CASE7( 58, 59, 5A, 5B, 5C, 5D, 5F ): // BIT 3,r
+		CASE7( 60, 61, 62, 63, 64, 65, 67 ): // BIT 4,r
+		CASE7( 68, 69, 6A, 6B, 6C, 6D, 6F ): // BIT 5,r
+		CASE7( 70, 71, 72, 73, 74, 75, 77 ): // BIT 6,r
+		CASE7( 78, 79, 7A, 7B, 7C, 7D, 7F ): // BIT 7,r
+			temp = R8( data & 7, 0 );
+			flags = (flags & C01) | (temp & (F20 | F08));
+		bit_temp:
+			int masked = temp & 1 << (data >> 3 & 7);
+			flags |=(masked & S80) | H10 |
+					((masked - 1) >> 8 & (Z40 | P04));
+			goto loop;
+		}
+		
+	// SET/RES
+		CASE8( 86, 8E, 96, 9E, A6, AE, B6, BE ): // RES b,(HL)
+		CASE8( C6, CE, D6, DE, E6, EE, F6, FE ):{// SET b,(HL)
+			s_time += 7;
+			int temp = READ( rp.hl );
+			int bit = 1 << (data >> 3 & 7);
+			temp |= bit; // SET
+			if ( !(data & 0x40) )
+				temp ^= bit; // RES
+			WRITE( rp.hl, temp );
+			goto loop;
+		}
+		
+		CASE7( C0, C1, C2, C3, C4, C5, C7 ): // SET 0,r
+		CASE7( C8, C9, CA, CB, CC, CD, CF ): // SET 1,r
+		CASE7( D0, D1, D2, D3, D4, D5, D7 ): // SET 2,r
+		CASE7( D8, D9, DA, DB, DC, DD, DF ): // SET 3,r
+		CASE7( E0, E1, E2, E3, E4, E5, E7 ): // SET 4,r
+		CASE7( E8, E9, EA, EB, EC, ED, EF ): // SET 5,r
+		CASE7( F0, F1, F2, F3, F4, F5, F7 ): // SET 6,r
+		CASE7( F8, F9, FA, FB, FC, FD, FF ): // SET 7,r
+			R8( data & 7, 0 ) |= 1 << (data >> 3 & 7);
+			goto loop;
+		
+		CASE7( 80, 81, 82, 83, 84, 85, 87 ): // RES 0,r
+		CASE7( 88, 89, 8A, 8B, 8C, 8D, 8F ): // RES 1,r
+		CASE7( 90, 91, 92, 93, 94, 95, 97 ): // RES 2,r
+		CASE7( 98, 99, 9A, 9B, 9C, 9D, 9F ): // RES 3,r
+		CASE7( A0, A1, A2, A3, A4, A5, A7 ): // RES 4,r
+		CASE7( A8, A9, AA, AB, AC, AD, AF ): // RES 5,r
+		CASE7( B0, B1, B2, B3, B4, B5, B7 ): // RES 6,r
+		CASE7( B8, B9, BA, BB, BC, BD, BF ): // RES 7,r
+			R8( data & 7, 0 ) &= ~(1 << (data >> 3 & 7));
+			goto loop;
+		}
+		assert( false );
+	}
+
+#undef GET_ADDR
+#define GET_ADDR()  GET_LE16( instr + 1 )
+
+//////////////////////////////////////// ED prefix
+	{
+	case 0xED:
+		pc++;
+		s_time += ed_dd_timing [data] >> 4;
+		switch ( data )
+		{
+		{
+			blargg_ulong temp;
+		case 0x72: // SBC HL,SP
+		case 0x7A: // ADC HL,SP
+			temp = sp;
+			if ( 0 )
+		case 0x42: // SBC HL,BC
+		case 0x52: // SBC HL,DE
+		case 0x62: // SBC HL,HL
+		case 0x4A: // ADC HL,BC
+		case 0x5A: // ADC HL,DE
+		case 0x6A: // ADC HL,HL
+				temp = R16( data >> 3 & 6, 1, 0 );
+			blargg_ulong sum = temp + (flags & C01);
+			flags = ~data >> 2 & N02;
+			if ( flags )
+				sum = -sum;
+			sum += rp.hl;
+			temp ^= rp.hl;
+			temp ^= sum;
+			flags |=(sum >> 16 & C01) |
+					(temp >> 8 & H10) |
+					(sum >> 8 & (S80 | F20 | F08)) |
+					((temp - -0x8000) >> 14 & V04);
+			rp.hl = sum;
+			if ( (uint16_t) sum )
+				goto loop;
+			flags |= Z40;
+			goto loop;
+		}
+		
+		CASE8( 40, 48, 50, 58, 60, 68, 70, 78 ):{// IN r,(C)
+			int temp = IN( rp.bc );
+			R8( data >> 3, 8 ) = temp;
+			flags = (flags & C01) | SZ28P( temp );
+			goto loop;
+		}
+		
+		case 0x71: // OUT (C),0
+			rg.flags = 0;
+		CASE7( 41, 49, 51, 59, 61, 69, 79 ): // OUT (C),r
+			OUT( rp.bc, R8( data >> 3, 8 ) );
+			goto loop;
+		
+		{
+			unsigned temp;
+		case 0x73: // LD (ADDR),SP
+			temp = sp;
+			if ( 0 )
+		case 0x43: // LD (ADDR),BC
+		case 0x53: // LD (ADDR),DE
+				temp = R16( data, 4, 0x43 );
+			uint_fast16_t addr = GET_ADDR();
+			pc += 2;
+			WRITE_WORD( addr, temp );
+			goto loop;
+		}
+		
+		case 0x4B: // LD BC,(ADDR)
+		case 0x5B:{// LD DE,(ADDR)
+			uint_fast16_t addr = GET_ADDR();
+			pc += 2;
+			R16( data, 4, 0x4B ) = READ_WORD( addr );
+			goto loop;
+		}
+		
+		case 0x7B:{// LD SP,(ADDR)
+			uint_fast16_t addr = GET_ADDR();
+			pc += 2;
+			sp = READ_WORD( addr );
+			goto loop;
+		}
+		
+		case 0x67:{// RRD
+			uint_fast8_t temp = READ( rp.hl );
+			WRITE( rp.hl, (rg.a << 4) | (temp >> 4) );
+			temp = (rg.a & 0xF0) | (temp & 0x0F);
+			flags = (flags & C01) | SZ28P( temp );
+			rg.a = temp;
+			goto loop;
+		}
+		
+		case 0x6F:{// RLD
+			uint_fast8_t temp = READ( rp.hl );
+			WRITE( rp.hl, (temp << 4) | (rg.a & 0x0F) );
+			temp = (rg.a & 0xF0) | (temp >> 4);
+			flags = (flags & C01) | SZ28P( temp );
+			rg.a = temp;
+			goto loop;
+		}
+		
+		CASE8( 44, 4C, 54, 5C, 64, 6C, 74, 7C ): // NEG
+			opcode = 0x10; // flag to do SBC instead of ADC
+			flags &= ~C01;
+			data = rg.a;
+			rg.a = 0;
+			goto adc_data;
+		
+		{
+			int inc;
+		case 0xA9: // CPD
+		case 0xB9: // CPDR
+			inc = -1;
+			if ( 0 )
+		case 0xA1: // CPI
+		case 0xB1: // CPIR
+				inc = +1;
+			uint_fast16_t addr = rp.hl;
+			rp.hl = addr + inc;
+			int temp = READ( addr );
+			
+			int result = rg.a - temp;
+			flags = (flags & C01) | N02 |
+					((((temp ^ rg.a) & H10) ^ result) & (S80 | H10));
+			
+			if ( !(uint8_t) result ) flags |= Z40;
+			result -= (flags & H10) >> 4;
+			flags |= result & F08;
+			flags |= result << 4 & F20;
+			if ( !--rp.bc )
+				goto loop;
+			
+			flags |= V04;
+			if ( flags & Z40 || data < 0xB0 )
+				goto loop;
+			
+			pc -= 2;
+			s_time += 5;
+			goto loop;
+		}
+		
+		{
+			int inc;
+		case 0xA8: // LDD
+		case 0xB8: // LDDR
+			inc = -1;
+			if ( 0 )
+		case 0xA0: // LDI
+		case 0xB0: // LDIR
+				inc = +1;
+			uint_fast16_t addr = rp.hl;
+			rp.hl = addr + inc;
+			int temp = READ( addr );
+			
+			addr = rp.de;
+			rp.de = addr + inc;
+			WRITE( addr, temp );
+			
+			temp += rg.a;
+			flags = (flags & (S80 | Z40 | C01)) |
+					(temp & F08) | (temp << 4 & F20);
+			if ( !--rp.bc )
+				goto loop;
+			
+			flags |= V04;
+			if ( data < 0xB0 )
+				goto loop;
+			
+			pc -= 2;
+			s_time += 5;
+			goto loop;
+		}
+		
+		{
+			int inc;
+		case 0xAB: // OUTD
+		case 0xBB: // OTDR
+			inc = -1;
+			if ( 0 )
+		case 0xA3: // OUTI
+		case 0xB3: // OTIR
+				inc = +1;
+			uint_fast16_t addr = rp.hl;
+			rp.hl = addr + inc;
+			int temp = READ( addr );
+			
+			int b = --rg.b;
+			flags = (temp >> 6 & N02) | SZ28( b );
+			if ( b && data >= 0xB0 )
+			{
+				pc -= 2;
+				s_time += 5;
+			}
+			
+			OUT( rp.bc, temp );
+			goto loop;
+		}
+		
+		{
+			int inc;
+		case 0xAA: // IND
+		case 0xBA: // INDR
+			inc = -1;
+			if ( 0 )
+		case 0xA2: // INI
+		case 0xB2: // INIR
+				inc = +1;
+			
+			uint_fast16_t addr = rp.hl;
+			rp.hl = addr + inc;
+			
+			int temp = IN( rp.bc );
+			
+			int b = --rg.b;
+			flags = (temp >> 6 & N02) | SZ28( b );
+			if ( b && data >= 0xB0 )
+			{
+				pc -= 2;
+				s_time += 5;
+			}
+			
+			WRITE( addr, temp );
+			goto loop;
+		}
+		
+		case 0x47: // LD I,A
+			r.i = rg.a;
+			goto loop;
+		
+		case 0x4F: // LD R,A
+			SET_R( rg.a );
+			debug_printf( "LD R,A not supported\n" );
+			warning = true;
+			goto loop;
+		
+		case 0x57: // LD A,I
+			rg.a = r.i;
+			goto ld_ai_common;
+		
+		case 0x5F: // LD A,R
+			rg.a = GET_R();
+			debug_printf( "LD A,R not supported\n" );
+			warning = true;
+		ld_ai_common:
+			flags = (flags & C01) | SZ28( rg.a ) | (r.iff2 << 2 & V04);
+			goto loop;
+		
+		CASE8( 45, 4D, 55, 5D, 65, 6D, 75, 7D ): // RETI/RETN
+			r.iff1 = r.iff2;
+			goto ret_taken;
+		
+		case 0x46: case 0x4E: case 0x66: case 0x6E: // IM 0
+			r.im = 0;
+			goto loop;
+		
+		case 0x56: case 0x76: // IM 1
+			r.im = 1;
+			goto loop;
+		
+		case 0x5E: case 0x7E: // IM 2
+			r.im = 2;
+			goto loop;
+		
+		default:
+			debug_printf( "Opcode $ED $%02X not supported\n", data );
+			warning = true;
+			goto loop;
+		}
+		assert( false );
+	}
+
+//////////////////////////////////////// DD/FD prefix
+	{
+	uint_fast16_t ixy;
+	case 0xDD:
+		ixy = ix;
+		goto ix_prefix;
+	case 0xFD:
+		ixy = iy;
+	ix_prefix:
+		pc++;
+		unsigned data2 = READ_PROG( pc );
+		s_time += ed_dd_timing [data] & 0x0F;
+		switch ( data )
+		{
+	// TODO: more efficient way of avoid negative address
+	// TODO: avoid using this as argument to READ() since it is evaluated twice
+	#define IXY_DISP( ixy, disp )   uint16_t ((ixy) + (disp))
+	
+	#define SET_IXY( in ) if ( opcode == 0xDD ) ix = in; else iy = in;
+	
+	// ADD/ADC/SUB/SBC
+	
+		case 0x96: // SUB (IXY+disp)
+		case 0x86: // ADD (IXY+disp)
+			flags &= ~C01;
+		case 0x9E: // SBC (IXY+disp)
+		case 0x8E: // ADC (IXY+disp)
+			pc++;
+			opcode = data;
+			data = READ( IXY_DISP( ixy, (int8_t) data2 ) );
+			goto adc_data;
+		
+		case 0x94: // SUB HXY
+		case 0x84: // ADD HXY
+			flags &= ~C01;
+		case 0x9C: // SBC HXY
+		case 0x8C: // ADC HXY
+			opcode = data;
+			data = ixy >> 8;
+			goto adc_data;
+		
+		case 0x95: // SUB LXY
+		case 0x85: // ADD LXY
+			flags &= ~C01;
+		case 0x9D: // SBC LXY
+		case 0x8D: // ADC LXY
+			opcode = data;
+			data = (uint8_t) ixy;
+			goto adc_data;
+		
+		{
+			unsigned temp;
+		case 0x39: // ADD IXY,SP
+			temp = sp;
+			goto add_ixy_data;
+		
+		case 0x29: // ADD IXY,HL
+			temp = ixy;
+			goto add_ixy_data;
+		
+		case 0x09: // ADD IXY,BC
+		case 0x19: // ADD IXY,DE
+			temp = R16( data, 4, 0x09 );
+		add_ixy_data: {
+			blargg_ulong sum = ixy + temp;
+			temp ^= ixy;
+			ixy = (uint16_t) sum;
+			flags = (flags & (S80 | Z40 | V04)) |
+					(sum >> 16) |
+					(sum >> 8 & (F20 | F08)) |
+					((temp ^ sum) >> 8 & H10);
+			goto set_ixy;
+		}
+		}
+	
+	// AND
+		case 0xA6: // AND (IXY+disp)
+			pc++;
+			data = READ( IXY_DISP( ixy, (int8_t) data2 ) );
+			goto and_data;
+		
+		case 0xA4: // AND HXY
+			data = ixy >> 8;
+			goto and_data;
+		
+		case 0xA5: // AND LXY
+			data = (uint8_t) ixy;
+			goto and_data;
+	
+	// OR
+		case 0xB6: // OR (IXY+disp)
+			pc++;
+			data = READ( IXY_DISP( ixy, (int8_t) data2 ) );
+			goto or_data;
+		
+		case 0xB4: // OR HXY
+			data = ixy >> 8;
+			goto or_data;
+		
+		case 0xB5: // OR LXY
+			data = (uint8_t) ixy;
+			goto or_data;
+	
+	// XOR
+		case 0xAE: // XOR (IXY+disp)
+			pc++;
+			data = READ( IXY_DISP( ixy, (int8_t) data2 ) );
+			goto xor_data;
+		
+		case 0xAC: // XOR HXY
+			data = ixy >> 8;
+			goto xor_data;
+		
+		case 0xAD: // XOR LXY
+			data = (uint8_t) ixy;
+			goto xor_data;
+	
+	// CP
+		case 0xBE: // CP (IXY+disp)
+			pc++;
+			data = READ( IXY_DISP( ixy, (int8_t) data2 )  );
+			goto cp_data;
+		
+		case 0xBC: // CP HXY
+			data = ixy >> 8;
+			goto cp_data;
+		
+		case 0xBD: // CP LXY
+			data = (uint8_t) ixy;
+			goto cp_data;
+		
+	// LD
+		CASE7( 70, 71, 72, 73, 74, 75, 77 ): // LD (IXY+disp),r
+			data = R8( data, 0x70 );
+			if ( 0 )
+		case 0x36: // LD (IXY+disp),imm
+				pc++, data = READ_PROG( pc );
+			pc++;
+			WRITE( IXY_DISP( ixy, (int8_t) data2 ), data );
+			goto loop;
+
+		CASE5( 44, 4C, 54, 5C, 7C ): // LD r,HXY
+			R8( data >> 3, 8 ) = ixy >> 8;
+			goto loop;
+		
+		case 0x64: // LD HXY,HXY
+		case 0x6D: // LD LXY,LXY
+			goto loop;
+		
+		CASE5( 45, 4D, 55, 5D, 7D ): // LD r,LXY
+			R8( data >> 3, 8 ) = ixy;
+			goto loop;
+		
+		CASE7( 46, 4E, 56, 5E, 66, 6E, 7E ): // LD r,(IXY+disp)
+			pc++;
+			R8( data >> 3, 8 ) = READ( IXY_DISP( ixy, (int8_t) data2 ) );
+			goto loop;
+		
+		case 0x26: // LD HXY,imm
+			pc++;
+			goto ld_hxy_data;
+			
+		case 0x65: // LD HXY,LXY
+			data2 = (uint8_t) ixy;
+			goto ld_hxy_data;
+		
+		CASE5( 60, 61, 62, 63, 67 ): // LD HXY,r
+			data2 = R8( data, 0x60 );
+		ld_hxy_data:
+			ixy = (uint8_t) ixy | (data2 << 8);
+			goto set_ixy;
+		
+		case 0x2E: // LD LXY,imm
+			pc++;
+			goto ld_lxy_data;
+			
+		case 0x6C: // LD LXY,HXY
+			data2 = ixy >> 8;
+			goto ld_lxy_data;
+		
+		CASE5( 68, 69, 6A, 6B, 6F ): // LD LXY,r
+			data2 = R8( data, 0x68 );
+		ld_lxy_data:
+			ixy = (ixy & 0xFF00) | data2;
+		set_ixy:
+			if ( opcode == 0xDD )
+			{
+				ix = ixy;
+				goto loop;
+			}
+			iy = ixy;
+			goto loop;
+
+		case 0xF9: // LD SP,IXY
+			sp = ixy;
+			goto loop;
+	
+		case 0x22:{// LD (ADDR),IXY
+			uint_fast16_t addr = GET_ADDR();
+			pc += 2;
+			WRITE_WORD( addr, ixy );
+			goto loop;
+		}
+		
+		case 0x21: // LD IXY,imm
+			ixy = GET_ADDR();
+			pc += 2;
+			goto set_ixy;
+		
+		case 0x2A:{// LD IXY,(addr)
+			uint_fast16_t addr = GET_ADDR();
+			ixy = READ_WORD( addr );
+			pc += 2;
+			goto set_ixy;
+		}
+		
+	// DD/FD CB prefix
+		case 0xCB: {
+			data = IXY_DISP( ixy, (int8_t) data2 );
+			pc++;
+			data2 = READ_PROG( pc );
+			pc++;
+			switch ( data2 )
+			{
+			case 0x06: goto rlc_data_addr; // RLC (IXY)
+			case 0x16: goto rl_data_addr;  // RL (IXY)
+			case 0x26: goto sla_data_addr; // SLA (IXY)
+			case 0x36: goto sll_data_addr; // SLL (IXY)
+			case 0x0E: goto rrc_data_addr; // RRC (IXY)
+			case 0x1E: goto rr_data_addr;  // RR (IXY)
+			case 0x2E: goto sra_data_addr; // SRA (IXY)
+			case 0x3E: goto srl_data_addr; // SRL (IXY)
+			
+			CASE8( 46, 4E, 56, 5E, 66, 6E, 76, 7E ):{// BIT b,(IXY+disp)
+				uint_fast8_t temp = READ( data );
+				int masked = temp & 1 << (data2 >> 3 & 7);
+				flags = (flags & C01) | H10 |
+						(masked & S80) |
+						((masked - 1) >> 8 & (Z40 | P04));
+				goto loop;
+			}
+			
+			CASE8( 86, 8E, 96, 9E, A6, AE, B6, BE ): // RES b,(IXY+disp)
+			CASE8( C6, CE, D6, DE, E6, EE, F6, FE ):{// SET b,(IXY+disp)
+				int temp = READ( data );
+				int bit = 1 << (data2 >> 3 & 7);
+				temp |= bit; // SET
+				if ( !(data2 & 0x40) )
+					temp ^= bit; // RES
+				WRITE( data, temp );
+				goto loop;
+			}
+			
+			default:
+				debug_printf( "Opcode $%02X $CB $%02X not supported\n", opcode, data2 );
+				warning = true;
+				goto loop;
+			}
+			assert( false );
+		}
+		
+	// INC/DEC
+		case 0x23: // INC IXY
+			ixy = uint16_t (ixy + 1);
+			goto set_ixy;
+		
+		case 0x2B: // DEC IXY
+			ixy = uint16_t (ixy - 1);
+			goto set_ixy;
+		
+		case 0x34: // INC (IXY+disp)
+			ixy = IXY_DISP( ixy, (int8_t) data2 );
+			pc++;
+			data = READ( ixy ) + 1;
+			WRITE( ixy, data );
+			goto inc_set_flags;
+		
+		case 0x35: // DEC (IXY+disp)
+			ixy = IXY_DISP( ixy, (int8_t) data2 );
+			pc++;
+			data = READ( ixy ) - 1;
+			WRITE( ixy, data );
+			goto dec_set_flags;
+		
+		case 0x24: // INC HXY
+			ixy = uint16_t (ixy + 0x100);
+			data = ixy >> 8;
+			goto inc_xy_common;
+		
+		case 0x2C: // INC LXY
+			data = uint8_t (ixy + 1);
+			ixy = (ixy & 0xFF00) | data;
+		inc_xy_common:
+			if ( opcode == 0xDD )
+			{
+				ix = ixy;
+				goto inc_set_flags;
+			}
+			iy = ixy;
+			goto inc_set_flags;
+		
+		case 0x25: // DEC HXY
+			ixy = uint16_t (ixy - 0x100);
+			data = ixy >> 8;
+			goto dec_xy_common;
+		
+		case 0x2D: // DEC LXY
+			data = uint8_t (ixy - 1);
+			ixy = (ixy & 0xFF00) | data;
+		dec_xy_common:
+			if ( opcode == 0xDD )
+			{
+				ix = ixy;
+				goto dec_set_flags;
+			}
+			iy = ixy;
+			goto dec_set_flags;
+		
+	// PUSH/POP
+		case 0xE5: // PUSH IXY
+			data = ixy;
+			goto push_data;
+		
+		case 0xE1:{// POP IXY
+			ixy = READ_WORD( sp );
+			sp = uint16_t (sp + 2);
+			goto set_ixy;
+		}
+	
+	// Misc
+		
+		case 0xE9: // JP (IXY)
+			pc = ixy;
+			goto loop;
+		
+		case 0xE3:{// EX (SP),IXY
+			uint_fast16_t temp = READ_WORD( sp );
+			WRITE_WORD( sp, ixy );
+			ixy = temp;
+			goto set_ixy;
+		}
+		
+		default:
+			debug_printf( "Unnecessary DD/FD prefix encountered\n" );
+			warning = true;
+			pc--;
+			goto loop;
+		}
+		assert( false );
+	}
+	
+	}
+	debug_printf( "Unhandled main opcode: $%02X\n", opcode );
+	assert( false );
+	
+hit_idle_addr:
+	s_time -= 11;
+	goto out_of_time;
+halt:
+	s_time &= 3; // increment by multiple of 4
+out_of_time:
+	pc--;
+	
+	s.time = s_time;
+	rg.flags = flags;
+	r.ix    = ix;
+	r.iy    = iy;
+	r.sp    = sp;
+	r.pc    = pc;
+	this->r.b = rg;
+	this->state_ = s;
+	this->state = &this->state_;
+	
+	return warning;
+}
diff --git a/libraries/game-music-emu/gme/Kss_Cpu.h b/libraries/game-music-emu/gme/Kss_Cpu.h
new file mode 100644
index 000000000..d31864cd3
--- /dev/null
+++ b/libraries/game-music-emu/gme/Kss_Cpu.h
@@ -0,0 +1,120 @@
+// Z80 CPU emulator
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef KSS_CPU_H
+#define KSS_CPU_H
+
+#include "blargg_endian.h"
+
+typedef blargg_long cpu_time_t;
+
+// must be defined by caller
+void kss_cpu_out( class Kss_Cpu*, cpu_time_t, unsigned addr, int data );
+int  kss_cpu_in( class Kss_Cpu*, cpu_time_t, unsigned addr );
+void kss_cpu_write( class Kss_Cpu*, unsigned addr, int data );
+
+class Kss_Cpu {
+public:
+	// Clear registers and map all pages to unmapped
+	void reset( void* unmapped_write, void const* unmapped_read );
+	
+	// Map memory. Start and size must be multiple of page_size.
+	enum { page_size = 0x2000 };
+	void map_mem( unsigned addr, blargg_ulong size, void* write, void const* read );
+	
+	// Map address to page
+	uint8_t* write( unsigned addr );
+	uint8_t const* read( unsigned addr );
+	
+	// Run until specified time is reached. Returns true if suspicious/unsupported
+	// instruction was encountered at any point during run.
+	bool run( cpu_time_t end_time );
+	
+	// Time of beginning of next instruction
+	cpu_time_t time() const             { return state->time + state->base; }
+	
+	// Alter current time. Not supported during run() call.
+	void set_time( cpu_time_t t )       { state->time = t - state->base; }
+	void adjust_time( int delta )       { state->time += delta; }
+	
+	#if BLARGG_BIG_ENDIAN
+		struct regs_t { uint8_t b, c, d, e, h, l, flags, a; };
+	#else
+		struct regs_t { uint8_t c, b, e, d, l, h, a, flags; };
+	#endif
+	BOOST_STATIC_ASSERT( sizeof (regs_t) == 8 );
+	
+	struct pairs_t { uint16_t bc, de, hl, fa; };
+	
+	// Registers are not updated until run() returns
+	struct registers_t {
+		uint16_t pc;
+		uint16_t sp;
+		uint16_t ix;
+		uint16_t iy;
+		union {
+			regs_t b; //  b.b, b.c, b.d, b.e, b.h, b.l, b.flags, b.a
+			pairs_t w; // w.bc, w.de, w.hl. w.fa
+		};
+		union {
+			regs_t b;
+			pairs_t w;
+		} alt;
+		uint8_t iff1;
+		uint8_t iff2;
+		uint8_t r;
+		uint8_t i;
+		uint8_t im;
+	};
+	//registers_t r; (below for efficiency)
+	
+	enum { idle_addr = 0xFFFF };
+	
+	// can read this far past end of a page
+	enum { cpu_padding = 0x100 };
+	
+public:
+	Kss_Cpu();
+	enum { page_shift = 13 };
+	enum { page_count = 0x10000 >> page_shift };
+private:
+	uint8_t szpc [0x200];
+	cpu_time_t end_time_;
+	struct state_t {
+		uint8_t const* read  [page_count + 1];
+		uint8_t      * write [page_count + 1];
+		cpu_time_t base;
+		cpu_time_t time;
+	};
+	state_t* state; // points to state_ or a local copy within run()
+	state_t state_;
+	void set_end_time( cpu_time_t t );
+	void set_page( int i, void* write, void const* read );
+public:
+	registers_t r;
+};
+
+#if BLARGG_NONPORTABLE
+	#define KSS_CPU_PAGE_OFFSET( addr ) (addr)
+#else
+	#define KSS_CPU_PAGE_OFFSET( addr ) ((addr) & (page_size - 1))
+#endif
+
+inline uint8_t* Kss_Cpu::write( unsigned addr )
+{
+	return state->write [addr >> page_shift] + KSS_CPU_PAGE_OFFSET( addr );
+}
+
+inline uint8_t const* Kss_Cpu::read( unsigned addr )
+{
+	return state->read [addr >> page_shift] + KSS_CPU_PAGE_OFFSET( addr );
+}
+
+inline void Kss_Cpu::set_end_time( cpu_time_t t )
+{
+	cpu_time_t delta = state->base - t;
+	state->base = t;
+	state->time += delta;
+}
+
+#endif
diff --git a/libraries/game-music-emu/gme/Kss_Emu.cpp b/libraries/game-music-emu/gme/Kss_Emu.cpp
new file mode 100644
index 000000000..fd4905ce3
--- /dev/null
+++ b/libraries/game-music-emu/gme/Kss_Emu.cpp
@@ -0,0 +1,416 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Kss_Emu.h"
+
+#include "blargg_endian.h"
+#include <string.h>
+
+/* Copyright (C) 2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+long const clock_rate = 3579545;
+int const osc_count = Ay_Apu::osc_count + Scc_Apu::osc_count;
+
+Kss_Emu::Kss_Emu()
+{
+	sn = 0;
+	set_type( gme_kss_type );
+	set_silence_lookahead( 6 );
+	static const char* const names [osc_count] = {
+		"Square 1", "Square 2", "Square 3",
+		"Wave 1", "Wave 2", "Wave 3", "Wave 4", "Wave 5"
+	};
+	set_voice_names( names );
+	
+	static int const types [osc_count] = {
+		wave_type | 0, wave_type | 1, wave_type | 2,
+		wave_type | 3, wave_type | 4, wave_type | 5, wave_type | 6, wave_type | 7
+	};
+	set_voice_types( types );
+	
+	memset( unmapped_read, 0xFF, sizeof unmapped_read );
+}
+
+Kss_Emu::~Kss_Emu() { unload(); }
+
+void Kss_Emu::unload()
+{
+	delete sn;
+	sn = 0;
+	Classic_Emu::unload();
+}
+
+// Track info
+
+static void copy_kss_fields( Kss_Emu::header_t const& h, track_info_t* out )
+{
+	const char* system = "MSX";
+	if ( h.device_flags & 0x02 )
+	{
+		system = "Sega Master System";
+		if ( h.device_flags & 0x04 )
+			system = "Game Gear";
+	}
+	Gme_File::copy_field_( out->system, system );
+}
+
+blargg_err_t Kss_Emu::track_info_( track_info_t* out, int ) const
+{
+	copy_kss_fields( header_, out );
+	return 0;
+}
+
+static blargg_err_t check_kss_header( void const* header )
+{
+	if ( memcmp( header, "KSCC", 4 ) && memcmp( header, "KSSX", 4 ) )
+		return gme_wrong_file_type;
+	return 0;
+}
+
+struct Kss_File : Gme_Info_
+{
+	Kss_Emu::header_t header_;
+	
+	Kss_File() { set_type( gme_kss_type ); }
+	
+	blargg_err_t load_( Data_Reader& in )
+	{
+		blargg_err_t err = in.read( &header_, Kss_Emu::header_size );
+		if ( err )
+			return (err == in.eof_error ? gme_wrong_file_type : err);
+		return check_kss_header( &header_ );
+	}
+	
+	blargg_err_t track_info_( track_info_t* out, int ) const
+	{
+		copy_kss_fields( header_, out );
+		return 0;
+	}
+};
+
+static Music_Emu* new_kss_emu () { return BLARGG_NEW Kss_Emu ; }
+static Music_Emu* new_kss_file() { return BLARGG_NEW Kss_File; }
+
+static gme_type_t_ const gme_kss_type_ = { "MSX", 256, &new_kss_emu, &new_kss_file, "KSS", 0x03 };
+BLARGG_EXPORT extern gme_type_t const gme_kss_type = &gme_kss_type_;
+
+
+// Setup
+
+void Kss_Emu::update_gain()
+{
+	double g = gain() * 1.4;
+	if ( scc_accessed )
+		g *= 1.5;
+	ay.volume( g );
+	scc.volume( g );
+	if ( sn )
+		sn->volume( g );
+}
+
+blargg_err_t Kss_Emu::load_( Data_Reader& in )
+{
+	memset( &header_, 0, sizeof header_ );
+	assert( offsetof (header_t,device_flags) == header_size - 1 );
+	assert( offsetof (ext_header_t,msx_audio_vol) == ext_header_size - 1 );
+	RETURN_ERR( rom.load( in, header_size, STATIC_CAST(header_t*,&header_), 0 ) );
+	
+	RETURN_ERR( check_kss_header( header_.tag ) );
+	
+	if ( header_.tag [3] == 'C' )
+	{
+		if ( header_.extra_header )
+		{
+			header_.extra_header = 0;
+			set_warning( "Unknown data in header" );
+		}
+		if ( header_.device_flags & ~0x0F )
+		{
+			header_.device_flags &= 0x0F;
+			set_warning( "Unknown data in header" );
+		}
+	}
+	else
+	{
+		ext_header_t& ext = header_;
+		memcpy( &ext, rom.begin(), min( (int) ext_header_size, (int) header_.extra_header ) );
+		if ( header_.extra_header > 0x10 )
+			set_warning( "Unknown data in header" );
+	}
+	
+	if ( header_.device_flags & 0x09 )
+		set_warning( "FM sound not supported" );
+	
+	scc_enabled = 0xC000;
+	if ( header_.device_flags & 0x04 )
+		scc_enabled = 0;
+	
+	if ( header_.device_flags & 0x02 && !sn )
+		CHECK_ALLOC( sn = BLARGG_NEW( Sms_Apu ) );
+	
+	set_voice_count( osc_count );
+	
+	return setup_buffer( ::clock_rate );
+}
+
+void Kss_Emu::update_eq( blip_eq_t const& eq )
+{
+	ay.treble_eq( eq );
+	scc.treble_eq( eq );
+	if ( sn )
+		sn->treble_eq( eq );
+}
+
+void Kss_Emu::set_voice( int i, Blip_Buffer* center, Blip_Buffer* left, Blip_Buffer* right )
+{
+	int i2 = i - ay.osc_count;
+	if ( i2 >= 0 )
+		scc.osc_output( i2, center );
+	else
+		ay.osc_output( i, center );
+	if ( sn && i < sn->osc_count )
+		sn->osc_output( i, center, left, right );
+}
+
+// Emulation
+
+void Kss_Emu::set_tempo_( double t )
+{
+	blip_time_t period =
+			(header_.device_flags & 0x40 ? ::clock_rate / 50 : ::clock_rate / 60);
+	play_period = blip_time_t (period / t);
+}
+
+blargg_err_t Kss_Emu::start_track_( int track )
+{
+	RETURN_ERR( Classic_Emu::start_track_( track ) );
+
+	memset( ram, 0xC9, 0x4000 );
+	memset( ram + 0x4000, 0, sizeof ram - 0x4000 );
+	
+	// copy driver code to lo RAM
+	static byte const bios [] = {
+		0xD3, 0xA0, 0xF5, 0x7B, 0xD3, 0xA1, 0xF1, 0xC9, // $0001: WRTPSG
+		0xD3, 0xA0, 0xDB, 0xA2, 0xC9                    // $0009: RDPSG
+	};
+	static byte const vectors [] = {
+		0xC3, 0x01, 0x00,   // $0093: WRTPSG vector
+		0xC3, 0x09, 0x00,   // $0096: RDPSG vector
+	};
+	memcpy( ram + 0x01, bios,    sizeof bios );
+	memcpy( ram + 0x93, vectors, sizeof vectors );
+	
+	// copy non-banked data into RAM
+	unsigned load_addr = get_le16( header_.load_addr );
+	long orig_load_size = get_le16( header_.load_size );
+	long load_size = min( orig_load_size, rom.file_size() );
+	load_size = min( load_size, long (mem_size - load_addr) );
+	if ( load_size != orig_load_size )
+		set_warning( "Excessive data size" );
+	memcpy( ram + load_addr, rom.begin() + header_.extra_header, load_size );
+	
+	rom.set_addr( -load_size - header_.extra_header );
+	
+	// check available bank data
+	blargg_long const bank_size = this->bank_size();
+	int max_banks = (rom.file_size() - load_size + bank_size - 1) / bank_size;
+	bank_count = header_.bank_mode & 0x7F;
+	if ( bank_count > max_banks )
+	{
+		bank_count = max_banks;
+		set_warning( "Bank data missing" );
+	}
+	//debug_printf( "load_size : $%X\n", load_size );
+	//debug_printf( "bank_size : $%X\n", bank_size );
+	//debug_printf( "bank_count: %d (%d claimed)\n", bank_count, header_.bank_mode & 0x7F );
+	
+	ram [idle_addr] = 0xFF;
+	cpu::reset( unmapped_write, unmapped_read );
+	cpu::map_mem( 0, mem_size, ram, ram );
+	
+	ay.reset();
+	scc.reset();
+	if ( sn )
+		sn->reset();
+	r.sp = 0xF380;
+	ram [--r.sp] = idle_addr >> 8;
+	ram [--r.sp] = idle_addr & 0xFF;
+	r.b.a = track;
+	r.pc = get_le16( header_.init_addr );
+	next_play = play_period;
+	scc_accessed = false;
+	gain_updated = false;
+	update_gain();
+	ay_latch = 0;
+	
+	return 0;
+}
+
+void Kss_Emu::set_bank( int logical, int physical )
+{
+	unsigned const bank_size = this->bank_size();
+	
+	unsigned addr = 0x8000;
+	if ( logical && bank_size == 8 * 1024 )
+		addr = 0xA000;
+	
+	physical -= header_.first_bank;
+	if ( (unsigned) physical >= (unsigned) bank_count )
+	{
+		byte* data = ram + addr;
+		cpu::map_mem( addr, bank_size, data, data );
+	}
+	else
+	{
+		long phys = physical * (blargg_long) bank_size;
+		for ( unsigned offset = 0; offset < bank_size; offset += page_size )
+			cpu::map_mem( addr + offset, page_size,
+					unmapped_write, rom.at_addr( phys + offset ) );
+	}
+}
+
+void Kss_Emu::cpu_write( unsigned addr, int data )
+{
+	data &= 0xFF;
+	switch ( addr )
+	{
+	case 0x9000:
+		set_bank( 0, data );
+		return;
+	
+	case 0xB000:
+		set_bank( 1, data );
+		return;
+	}
+	
+	int scc_addr = (addr & 0xDFFF) ^ 0x9800;
+	if ( scc_addr < scc.reg_count )
+	{
+		scc_accessed = true;
+		scc.write( time(), scc_addr, data );
+		return;
+	}
+	
+	debug_printf( "LD ($%04X),$%02X\n", addr, data );
+}
+
+void kss_cpu_write( Kss_Cpu* cpu, unsigned addr, int data )
+{
+	*cpu->write( addr ) = data;
+	if ( (addr & STATIC_CAST(Kss_Emu&,*cpu).scc_enabled) == 0x8000 )
+		STATIC_CAST(Kss_Emu&,*cpu).cpu_write( addr, data );
+}
+
+void kss_cpu_out( Kss_Cpu* cpu, cpu_time_t time, unsigned addr, int data )
+{
+	data &= 0xFF;
+	Kss_Emu& emu = STATIC_CAST(Kss_Emu&,*cpu);
+	switch ( addr & 0xFF )
+	{
+	case 0xA0:
+		emu.ay_latch = data & 0x0F;
+		return;
+	
+	case 0xA1:
+		GME_APU_HOOK( &emu, emu.ay_latch, data );
+		emu.ay.write( time, emu.ay_latch, data );
+		return;
+	
+	case 0x06:
+		if ( emu.sn && (emu.header_.device_flags & 0x04) )
+		{
+			emu.sn->write_ggstereo( time, data );
+			return;
+		}
+		break;
+	
+	case 0x7E:
+	case 0x7F:
+		if ( emu.sn )
+		{
+			GME_APU_HOOK( &emu, 16, data );
+			emu.sn->write_data( time, data );
+			return;
+		}
+		break;
+	
+	case 0xFE:
+		emu.set_bank( 0, data );
+		return;
+	
+	#ifndef NDEBUG
+	case 0xF1: // FM data
+		if ( data )
+			break; // trap non-zero data
+	case 0xF0: // FM addr
+	case 0xA8: // PPI
+		return;
+	#endif
+	}
+	
+	debug_printf( "OUT $%04X,$%02X\n", addr, data );
+}
+
+int kss_cpu_in( Kss_Cpu*, cpu_time_t, unsigned addr )
+{
+	//Kss_Emu& emu = STATIC_CAST(Kss_Emu&,*cpu);
+	//switch ( addr & 0xFF )
+	//{
+	//}
+	
+	debug_printf( "IN $%04X\n", addr );
+	return 0;
+}
+
+// Emulation
+
+blargg_err_t Kss_Emu::run_clocks( blip_time_t& duration, int )
+{
+	while ( time() < duration )
+	{
+		blip_time_t end = min( duration, next_play );
+		cpu::run( min( duration, next_play ) );
+		if ( r.pc == idle_addr )
+			set_time( end );
+		
+		if ( time() >= next_play )
+		{
+			next_play += play_period;
+			if ( r.pc == idle_addr )
+			{
+				if ( !gain_updated )
+				{
+					gain_updated = true;
+					if ( scc_accessed )
+						update_gain();
+				}
+				
+				ram [--r.sp] = idle_addr >> 8;
+				ram [--r.sp] = idle_addr & 0xFF;
+				r.pc = get_le16( header_.play_addr );
+				GME_FRAME_HOOK( this );
+			}
+		}
+	}
+	
+	duration = time();
+	next_play -= duration;
+	check( next_play >= 0 );
+	adjust_time( -duration );
+	ay.end_frame( duration );
+	scc.end_frame( duration );
+	if ( sn )
+		sn->end_frame( duration );
+	
+	return 0;
+}
diff --git a/libraries/game-music-emu/gme/Kss_Emu.h b/libraries/game-music-emu/gme/Kss_Emu.h
new file mode 100644
index 000000000..467b28abd
--- /dev/null
+++ b/libraries/game-music-emu/gme/Kss_Emu.h
@@ -0,0 +1,95 @@
+// MSX computer KSS music file emulator
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef KSS_EMU_H
+#define KSS_EMU_H
+
+#include "Classic_Emu.h"
+#include "Kss_Scc_Apu.h"
+#include "Kss_Cpu.h"
+#include "Sms_Apu.h"
+#include "Ay_Apu.h"
+
+class Kss_Emu : private Kss_Cpu, public Classic_Emu {
+	typedef Kss_Cpu cpu;
+public:
+	// KSS file header
+	enum { header_size = 0x10 };
+	struct header_t
+	{
+		byte tag [4];
+		byte load_addr [2];
+		byte load_size [2];
+		byte init_addr [2];
+		byte play_addr [2];
+		byte first_bank;
+		byte bank_mode;
+		byte extra_header;
+		byte device_flags;
+	};
+	
+	enum { ext_header_size = 0x10 };
+	struct ext_header_t
+	{
+		byte data_size [4];
+		byte unused [4];
+		byte first_track [2];
+		byte last_tack [2];
+		byte psg_vol;
+		byte scc_vol;
+		byte msx_music_vol;
+		byte msx_audio_vol;
+	};
+	
+	struct composite_header_t : header_t, ext_header_t { };
+	
+	// Header for currently loaded file
+	composite_header_t const& header() const { return header_; }
+	
+	static gme_type_t static_type() { return gme_kss_type; }
+public:
+	Kss_Emu();
+	~Kss_Emu();
+protected:
+	blargg_err_t track_info_( track_info_t*, int track ) const;
+	blargg_err_t load_( Data_Reader& );
+	blargg_err_t start_track_( int );
+	blargg_err_t run_clocks( blip_time_t&, int );
+	void set_tempo_( double );
+	void set_voice( int, Blip_Buffer*, Blip_Buffer*, Blip_Buffer* );
+	void update_eq( blip_eq_t const& );
+	void unload();
+private:
+	Rom_Data<page_size> rom;
+	composite_header_t header_;
+	
+	bool scc_accessed;
+	bool gain_updated;
+	void update_gain();
+	
+	unsigned scc_enabled; // 0 or 0xC000
+	int bank_count;
+	void set_bank( int logical, int physical );
+	blargg_long bank_size() const { return (16 * 1024L) >> (header_.bank_mode >> 7 & 1); }
+	
+	blip_time_t play_period;
+	blip_time_t next_play;
+	int ay_latch;
+	
+	friend void kss_cpu_out( class Kss_Cpu*, cpu_time_t, unsigned addr, int data );
+	friend int  kss_cpu_in( class Kss_Cpu*, cpu_time_t, unsigned addr );
+	void cpu_write( unsigned addr, int data );
+	friend void kss_cpu_write( class Kss_Cpu*, unsigned addr, int data );
+	
+	// large items
+	enum { mem_size = 0x10000 };
+	byte ram [mem_size + cpu_padding];
+	
+	Ay_Apu ay;
+	Scc_Apu scc;
+	Sms_Apu* sn;
+	byte unmapped_read  [0x100];
+	byte unmapped_write [page_size];
+};
+
+#endif
diff --git a/libraries/game-music-emu/gme/Kss_Scc_Apu.cpp b/libraries/game-music-emu/gme/Kss_Scc_Apu.cpp
new file mode 100644
index 000000000..bb84b3250
--- /dev/null
+++ b/libraries/game-music-emu/gme/Kss_Scc_Apu.cpp
@@ -0,0 +1,97 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Kss_Scc_Apu.h"
+
+/* Copyright (C) 2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+// Tones above this frequency are treated as disabled tone at half volume.
+// Power of two is more efficient (avoids division).
+unsigned const inaudible_freq = 16384;
+
+int const wave_size = 0x20;
+
+void Scc_Apu::run_until( blip_time_t end_time )
+{
+	for ( int index = 0; index < osc_count; index++ )
+	{
+		osc_t& osc = oscs [index];
+		
+		Blip_Buffer* const output = osc.output;
+		if ( !output )
+			continue;
+		output->set_modified();
+		
+		blip_time_t period = (regs [0x80 + index * 2 + 1] & 0x0F) * 0x100 +
+				regs [0x80 + index * 2] + 1;
+		int volume = 0;
+		if ( regs [0x8F] & (1 << index) )
+		{
+			blip_time_t inaudible_period = (blargg_ulong) (output->clock_rate() +
+					inaudible_freq * 32) / (inaudible_freq * 16);
+			if ( period > inaudible_period )
+				volume = (regs [0x8A + index] & 0x0F) * (amp_range / 256 / 15);
+		}
+		
+		int8_t const* wave = (int8_t*) regs + index * wave_size;
+		if ( index == osc_count - 1 )
+			wave -= wave_size; // last two oscs share wave
+		{
+			int amp = wave [osc.phase] * volume;
+			int delta = amp - osc.last_amp;
+			if ( delta )
+			{
+				osc.last_amp = amp;
+				synth.offset( last_time, delta, output );
+			}
+		}
+		
+		blip_time_t time = last_time + osc.delay;
+		if ( time < end_time )
+		{
+			if ( !volume )
+			{
+				// maintain phase
+				blargg_long count = (end_time - time + period - 1) / period;
+				osc.phase = (osc.phase + count) & (wave_size - 1);
+				time += count * period;
+			}
+			else
+			{
+				
+				int phase = osc.phase;
+				int last_wave = wave [phase];
+				phase = (phase + 1) & (wave_size - 1); // pre-advance for optimal inner loop
+				
+				do
+				{
+					int amp = wave [phase];
+					phase = (phase + 1) & (wave_size - 1);
+					int delta = amp - last_wave;
+					if ( delta )
+					{
+						last_wave = amp;
+						synth.offset( time, delta * volume, output );
+					}
+					time += period;
+				}
+				while ( time < end_time );
+				
+				osc.phase = phase = (phase - 1) & (wave_size - 1); // undo pre-advance
+				osc.last_amp = wave [phase] * volume;
+			}
+		}
+		osc.delay = time - end_time;
+	}
+	last_time = end_time;
+}
diff --git a/libraries/game-music-emu/gme/Kss_Scc_Apu.h b/libraries/game-music-emu/gme/Kss_Scc_Apu.h
new file mode 100644
index 000000000..eda5747fe
--- /dev/null
+++ b/libraries/game-music-emu/gme/Kss_Scc_Apu.h
@@ -0,0 +1,106 @@
+// Konami SCC sound chip emulator
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef KSS_SCC_APU_H
+#define KSS_SCC_APU_H
+
+#include "blargg_common.h"
+#include "Blip_Buffer.h"
+#include <string.h>
+
+class Scc_Apu {
+public:
+	// Set buffer to generate all sound into, or disable sound if NULL
+	void output( Blip_Buffer* );
+	
+	// Reset sound chip
+	void reset();
+	
+	// Write to register at specified time
+	enum { reg_count = 0x90 };
+	void write( blip_time_t time, int reg, int data );
+	
+	// Run sound to specified time, end current time frame, then start a new
+	// time frame at time 0. Time frames have no effect on emulation and each
+	// can be whatever length is convenient.
+	void end_frame( blip_time_t length );
+
+// Additional features
+	
+	// Set sound output of specific oscillator to buffer, where index is
+	// 0 to 4. If buffer is NULL, the specified oscillator is muted.
+	enum { osc_count = 5 };
+	void osc_output( int index, Blip_Buffer* );
+	
+	// Set overall volume (default is 1.0)
+	void volume( double );
+	
+	// Set treble equalization (see documentation)
+	void treble_eq( blip_eq_t const& );
+	
+public:
+	Scc_Apu();
+private:
+	enum { amp_range = 0x8000 };
+	struct osc_t
+	{
+		int delay;
+		int phase;
+		int last_amp;
+		Blip_Buffer* output;
+	};
+	osc_t oscs [osc_count];
+	blip_time_t last_time;
+	unsigned char regs [reg_count];
+	Blip_Synth<blip_med_quality,1> synth;
+	
+	void run_until( blip_time_t );
+};
+
+inline void Scc_Apu::volume( double v ) { synth.volume( 0.43 / osc_count / amp_range * v ); }
+
+inline void Scc_Apu::treble_eq( blip_eq_t const& eq ) { synth.treble_eq( eq ); }
+
+inline void Scc_Apu::osc_output( int index, Blip_Buffer* b )
+{
+	assert( (unsigned) index < osc_count );
+	oscs [index].output = b;
+}
+
+inline void Scc_Apu::write( blip_time_t time, int addr, int data )
+{
+	assert( (unsigned) addr < reg_count );
+	run_until( time );
+	regs [addr] = data;
+}
+
+inline void Scc_Apu::end_frame( blip_time_t end_time )
+{
+	if ( end_time > last_time )
+		run_until( end_time );
+	last_time -= end_time;
+	assert( last_time >= 0 );
+}
+
+inline void Scc_Apu::output( Blip_Buffer* buf )
+{
+	for ( int i = 0; i < osc_count; i++ )
+		oscs [i].output = buf;
+}
+
+inline Scc_Apu::Scc_Apu()
+{
+	output( 0 );
+}
+
+inline void Scc_Apu::reset()
+{
+	last_time = 0;
+	
+	for ( int i = 0; i < osc_count; i++ )
+		memset( &oscs [i], 0, offsetof (osc_t,output) );
+	
+	memset( regs, 0, sizeof regs );
+}
+
+#endif
diff --git a/libraries/game-music-emu/gme/M3u_Playlist.cpp b/libraries/game-music-emu/gme/M3u_Playlist.cpp
new file mode 100644
index 000000000..e751d4cc8
--- /dev/null
+++ b/libraries/game-music-emu/gme/M3u_Playlist.cpp
@@ -0,0 +1,426 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "M3u_Playlist.h"
+#include "Music_Emu.h"
+
+#include <string.h>
+
+/* Copyright (C) 2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+// gme functions defined here to avoid linking in m3u code unless it's used
+
+blargg_err_t Gme_File::load_m3u_( blargg_err_t err )
+{
+	require( raw_track_count_ ); // file must be loaded first
+	
+	if ( !err )
+	{
+		if ( playlist.size() )
+			track_count_ = playlist.size();
+		
+		int line = playlist.first_error();
+		if ( line )
+		{
+			// avoid using bloated printf()
+			char* out = &playlist_warning [sizeof playlist_warning];
+			*--out = 0;
+			do {
+				*--out = line % 10 + '0';
+			} while ( (line /= 10) > 0 );
+			
+			static const char str [] = "Problem in m3u at line ";
+			out -= sizeof str - 1;
+			memcpy( out, str, sizeof str - 1 );
+			set_warning( out );
+		}
+	}
+	return err;
+}
+
+blargg_err_t Gme_File::load_m3u( const char* path ) { return load_m3u_( playlist.load( path ) ); }
+
+blargg_err_t Gme_File::load_m3u( Data_Reader& in )  { return load_m3u_( playlist.load( in ) ); }
+
+BLARGG_EXPORT gme_err_t gme_load_m3u( Music_Emu* me, const char* path ) { return me->load_m3u( path ); }
+
+BLARGG_EXPORT gme_err_t gme_load_m3u_data( Music_Emu* me, const void* data, long size )
+{
+	Mem_File_Reader in( data, size );
+	return me->load_m3u( in );
+}
+
+
+
+static char* skip_white( char* in )
+{
+	while ( *in == ' ' )
+		in++;
+	return in;
+}
+
+inline unsigned from_dec( unsigned n ) { return n - '0'; }
+
+static char* parse_filename( char* in, M3u_Playlist::entry_t& entry )
+{
+	entry.file = in;
+	entry.type = "";
+	char* out = in;
+	while ( 1 )
+	{
+		int c = *in;
+		if ( !c ) break;
+		in++;
+		
+		if ( c == ',' ) // commas in filename
+		{
+			char* p = skip_white( in );
+			if ( *p == '$' || from_dec( *p ) <= 9 )
+			{
+				in = p;
+				break;
+			}
+		}
+		
+		if ( c == ':' && in [0] == ':' && in [1] && in [2] != ',' ) // ::type suffix
+		{
+			entry.type = ++in;
+			while ( (c = *in) != 0 && c != ',' )
+				in++;
+			if ( c == ',' )
+			{
+				*in++ = 0; // terminate type
+				in = skip_white( in );
+			}
+			break;
+		}
+		
+		if ( c == '\\' ) // \ prefix for special characters
+		{
+			c = *in;
+			if ( !c ) break;
+			in++;
+		}
+		*out++ = (char) c;
+	}
+	*out = 0; // terminate string
+	return in;
+}
+
+static char* next_field( char* in, int* result )
+{
+	while ( 1 )
+	{
+		in = skip_white( in );
+		
+		if ( !*in )
+			break;
+		
+		if ( *in == ',' )
+		{
+			in++;
+			break;
+		}
+		
+		*result = 1;
+		in++;
+	}
+	return skip_white( in );
+}
+
+static char* parse_int_( char* in, int* out )
+{
+	int n = 0;
+	while ( 1 )
+	{
+		unsigned d = from_dec( *in );
+		if ( d > 9 )
+			break;
+		in++;
+		n = n * 10 + d;
+		*out = n;
+	}
+	return in;
+}
+
+static char* parse_int( char* in, int* out, int* result )
+{
+	return next_field( parse_int_( in, out ), result );
+}
+
+// Returns 16 or greater if not hex
+inline int from_hex_char( int h )
+{
+	h -= 0x30;
+	if ( (unsigned) h > 9 )
+		h = ((h - 0x11) & 0xDF) + 10;
+	return h;
+}
+
+static char* parse_track( char* in, M3u_Playlist::entry_t& entry, int* result )
+{
+	if ( *in == '$' )
+	{
+		in++;
+		int n = 0;
+		while ( 1 )
+		{
+			int h = from_hex_char( *in );
+			if ( h > 15 )
+				break;
+			in++;
+			n = n * 16 + h;
+			entry.track = n;
+		}
+	}
+	else
+	{
+		in = parse_int_( in, &entry.track );
+		if ( entry.track >= 0 )
+			entry.decimal_track = 1;
+	}
+	return next_field( in, result );
+}
+
+static char* parse_time_( char* in, int* out )
+{
+	*out = -1;
+	int n = -1;
+	in = parse_int_( in, &n );
+	if ( n >= 0 )
+	{
+		*out = n;
+		if ( *in == ':' )
+		{
+			n = -1;
+			in = parse_int_( in + 1, &n );
+			if ( n >= 0 )
+				*out = *out * 60 + n;
+		}
+	}
+	return in;
+}
+
+static char* parse_time( char* in, int* out, int* result )
+{
+	return next_field( parse_time_( in, out ), result );
+}
+
+static char* parse_name( char* in )
+{
+	char* out = in;
+	while ( 1 )
+	{
+		int c = *in;
+		if ( !c ) break;
+		in++;
+		
+		if ( c == ',' ) // commas in string
+		{
+			char* p = skip_white( in );
+			if ( *p == ',' || *p == '-' || from_dec( *p ) <= 9 )
+			{
+				in = p;
+				break;
+			}
+		}
+		
+		if ( c == '\\' ) // \ prefix for special characters
+		{
+			c = *in;
+			if ( !c ) break;
+			in++;
+		}
+		*out++ = (char) c;
+	}
+	*out = 0; // terminate string
+	return in;
+}
+
+static int parse_line( char* in, M3u_Playlist::entry_t& entry )
+{
+	int result = 0;
+	
+	// file
+	entry.file = in;
+	entry.type = "";
+	in = parse_filename( in, entry );
+	
+	// track
+	entry.track = -1;
+	entry.decimal_track = 0;
+	in = parse_track( in, entry, &result );
+	
+	// name
+	entry.name = in;
+	in = parse_name( in );
+	
+	// time
+	entry.length = -1;
+	in = parse_time( in, &entry.length, &result );
+	
+	// loop
+	entry.intro = -1;
+	entry.loop  = -1;
+	if ( *in == '-' )
+	{
+		entry.loop = entry.length;
+		in++;
+	}
+	else
+	{
+		in = parse_time_( in, &entry.loop );
+		if ( entry.loop >= 0 )
+		{
+			entry.intro = 0;
+			if ( *in == '-' ) // trailing '-' means that intro length was specified 
+			{
+				in++;
+				entry.intro = entry.loop;
+				entry.loop  = entry.length - entry.intro;
+			}
+		}
+	}
+	in = next_field( in, &result );
+	
+	// fade
+	entry.fade = -1;
+	in = parse_time( in, &entry.fade, &result );
+	
+	// repeat
+	entry.repeat = -1;
+	in = parse_int( in, &entry.repeat, &result );
+	
+	return result;
+}
+
+static void parse_comment( char* in, M3u_Playlist::info_t& info, bool first )
+{
+	in = skip_white( in + 1 );
+	const char* field = in;
+	while ( *in && *in != ':' )
+		in++;
+	
+	if ( *in == ':' )
+	{
+		const char* text = skip_white( in + 1 );
+		if ( *text )
+		{
+			*in = 0;
+			     if ( !strcmp( "Composer", field ) ) info.composer = text;
+			else if ( !strcmp( "Engineer", field ) ) info.engineer = text;
+			else if ( !strcmp( "Ripping" , field ) ) info.ripping  = text;
+			else if ( !strcmp( "Tagging" , field ) ) info.tagging  = text;
+			else
+				text = 0;
+			if ( text )
+				return;
+			*in = ':';
+		}
+	}
+	
+	if ( first )
+		info.title = field;
+}
+
+blargg_err_t M3u_Playlist::parse_()
+{
+	info_.title    = "";
+	info_.composer = "";
+	info_.engineer = "";
+	info_.ripping  = "";
+	info_.tagging  = "";
+	
+	int const CR = 13;
+	int const LF = 10;
+	
+	data.end() [-1] = LF; // terminate input
+	
+	first_error_ = 0;
+	bool first_comment = true;
+	int line  = 0;
+	int count = 0;
+	char* in  = data.begin();
+	while ( in < data.end() )
+	{
+		// find end of line and terminate it
+		line++;
+		char* begin = in;
+		while ( *in != CR && *in != LF )
+		{
+			if ( !*in )
+				return "Not an m3u playlist";
+			in++;
+		}
+		if ( in [0] == CR && in [1] == LF ) // treat CR,LF as a single line
+			*in++ = 0;
+		*in++ = 0;
+		
+		// parse line
+		if ( *begin == '#' )
+		{
+			parse_comment( begin, info_, first_comment );
+			first_comment = false;
+		}
+		else if ( *begin )
+		{
+			if ( (int) entries.size() <= count )
+				RETURN_ERR( entries.resize( count * 2 + 64 ) );
+			
+			if ( !parse_line( begin, entries [count] ) )
+				count++;
+			else if ( !first_error_ )
+				first_error_ = line;
+			first_comment = false;
+		}
+	}
+	if ( count <= 0 )
+		return "Not an m3u playlist";
+	
+	if ( !(info_.composer [0] | info_.engineer [0] | info_.ripping [0] | info_.tagging [0]) )
+		info_.title = "";
+	
+	return entries.resize( count );
+}
+
+blargg_err_t M3u_Playlist::parse()
+{
+	blargg_err_t err = parse_();
+	if ( err )
+	{
+		entries.clear();
+		data.clear();
+	}
+	return err;
+}
+
+blargg_err_t M3u_Playlist::load( Data_Reader& in )
+{
+	RETURN_ERR( data.resize( in.remain() + 1 ) );
+	RETURN_ERR( in.read( data.begin(), data.size() - 1 ) );
+	return parse();
+}
+
+blargg_err_t M3u_Playlist::load( const char* path )
+{
+	GME_FILE_READER in;
+	RETURN_ERR( in.open( path ) );
+	return load( in );
+}
+
+blargg_err_t M3u_Playlist::load( void const* in, long size )
+{
+	RETURN_ERR( data.resize( size + 1 ) );
+	memcpy( data.begin(), in, size );
+	return parse();
+}
diff --git a/libraries/game-music-emu/gme/M3u_Playlist.h b/libraries/game-music-emu/gme/M3u_Playlist.h
new file mode 100644
index 000000000..6757b7cfb
--- /dev/null
+++ b/libraries/game-music-emu/gme/M3u_Playlist.h
@@ -0,0 +1,67 @@
+// M3U playlist file parser, with support for subtrack information
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef M3U_PLAYLIST_H
+#define M3U_PLAYLIST_H
+
+#include "blargg_common.h"
+#include "Data_Reader.h"
+
+class M3u_Playlist {
+public:
+	// Load playlist data
+	blargg_err_t load( const char* path );
+	blargg_err_t load( Data_Reader& in );
+	blargg_err_t load( void const* data, long size );
+	
+	// Line number of first parse error, 0 if no error. Any lines with parse
+	// errors are ignored.
+	int first_error() const { return first_error_; }
+	
+	struct info_t
+	{
+		const char* title;
+		const char* composer;
+		const char* engineer;
+		const char* ripping;
+		const char* tagging;
+	};
+	info_t const& info() const { return info_; }
+	
+	struct entry_t
+	{
+		const char* file; // filename without stupid ::TYPE suffix
+		const char* type; // if filename has ::TYPE suffix, this will be "TYPE". "" if none.
+		const char* name;
+		bool decimal_track; // true if track was specified in hex
+		// integers are -1 if not present
+		int track;  // 1-based
+		int length; // seconds
+		int intro;
+		int loop;
+		int fade;
+		int repeat; // count
+	};
+	entry_t const& operator [] ( int i ) const { return entries [i]; }
+	int size() const { return entries.size(); }
+	
+	void clear();
+	
+private:
+	blargg_vector<entry_t> entries;
+	blargg_vector<char> data;
+	int first_error_;
+	info_t info_;
+	
+	blargg_err_t parse();
+	blargg_err_t parse_();
+};
+
+inline void M3u_Playlist::clear()
+{
+	first_error_ = 0;
+	entries.clear();
+	data.clear();
+}
+
+#endif
diff --git a/libraries/game-music-emu/gme/Multi_Buffer.cpp b/libraries/game-music-emu/gme/Multi_Buffer.cpp
new file mode 100644
index 000000000..5f000ceeb
--- /dev/null
+++ b/libraries/game-music-emu/gme/Multi_Buffer.cpp
@@ -0,0 +1,232 @@
+// Blip_Buffer 0.4.1. http://www.slack.net/~ant/
+
+#include "Multi_Buffer.h"
+
+/* Copyright (C) 2003-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+#ifdef BLARGG_ENABLE_OPTIMIZER
+	#include BLARGG_ENABLE_OPTIMIZER
+#endif
+
+Multi_Buffer::Multi_Buffer( int spf ) : samples_per_frame_( spf )
+{
+	length_ = 0;
+	sample_rate_ = 0;
+	channels_changed_count_ = 1;
+}
+
+blargg_err_t Multi_Buffer::set_channel_count( int ) { return 0; }
+
+// Silent_Buffer
+
+Silent_Buffer::Silent_Buffer() : Multi_Buffer( 1 ) // 0 channels would probably confuse
+{
+	// TODO: better to use empty Blip_Buffer so caller never has to check for NULL?
+	chan.left   = 0;
+	chan.center = 0;
+	chan.right  = 0;
+}
+
+// Mono_Buffer
+
+Mono_Buffer::Mono_Buffer() : Multi_Buffer( 1 )
+{
+	chan.center = &buf;
+	chan.left   = &buf;
+	chan.right  = &buf;
+}
+
+Mono_Buffer::~Mono_Buffer() { }
+
+blargg_err_t Mono_Buffer::set_sample_rate( long rate, int msec )
+{
+	RETURN_ERR( buf.set_sample_rate( rate, msec ) );
+	return Multi_Buffer::set_sample_rate( buf.sample_rate(), buf.length() );
+}
+
+// Stereo_Buffer
+
+Stereo_Buffer::Stereo_Buffer() : Multi_Buffer( 2 )
+{
+	chan.center = &bufs [0];
+	chan.left = &bufs [1];
+	chan.right = &bufs [2];
+}
+
+Stereo_Buffer::~Stereo_Buffer() { }
+
+blargg_err_t Stereo_Buffer::set_sample_rate( long rate, int msec )
+{
+	for ( int i = 0; i < buf_count; i++ )
+		RETURN_ERR( bufs [i].set_sample_rate( rate, msec ) );
+	return Multi_Buffer::set_sample_rate( bufs [0].sample_rate(), bufs [0].length() );
+}
+
+void Stereo_Buffer::clock_rate( long rate )
+{
+	for ( int i = 0; i < buf_count; i++ )
+		bufs [i].clock_rate( rate );
+}
+
+void Stereo_Buffer::bass_freq( int bass )
+{
+	for ( int i = 0; i < buf_count; i++ )
+		bufs [i].bass_freq( bass );
+}
+
+void Stereo_Buffer::clear()
+{
+	stereo_added = 0;
+	was_stereo   = false;
+	for ( int i = 0; i < buf_count; i++ )
+		bufs [i].clear();
+}
+
+void Stereo_Buffer::end_frame( blip_time_t clock_count )
+{
+	stereo_added = 0;
+	for ( int i = 0; i < buf_count; i++ )
+	{
+		stereo_added |= bufs [i].clear_modified() << i;
+		bufs [i].end_frame( clock_count );
+	}
+}
+
+long Stereo_Buffer::read_samples( blip_sample_t* out, long count )
+{
+	require( !(count & 1) ); // count must be even
+	count = (unsigned) count / 2;
+	
+	long avail = bufs [0].samples_avail();
+	if ( count > avail )
+		count = avail;
+	if ( count )
+	{
+		int bufs_used = stereo_added | was_stereo;
+		//debug_printf( "%X\n", bufs_used );
+		if ( bufs_used <= 1 )
+		{
+			mix_mono( out, count );
+			bufs [0].remove_samples( count );
+			bufs [1].remove_silence( count );
+			bufs [2].remove_silence( count );
+		}
+		else if ( bufs_used & 1 )
+		{
+			mix_stereo( out, count );
+			bufs [0].remove_samples( count );
+			bufs [1].remove_samples( count );
+			bufs [2].remove_samples( count );
+		}
+		else
+		{
+			mix_stereo_no_center( out, count );
+			bufs [0].remove_silence( count );
+			bufs [1].remove_samples( count );
+			bufs [2].remove_samples( count );
+		}
+		
+		// to do: this might miss opportunities for optimization
+		if ( !bufs [0].samples_avail() )
+		{
+			was_stereo   = stereo_added;
+			stereo_added = 0;
+		}
+	}
+	
+	return count * 2;
+}
+
+void Stereo_Buffer::mix_stereo( blip_sample_t* out_, blargg_long count )
+{
+	blip_sample_t* BLIP_RESTRICT out = out_;
+	int const bass = BLIP_READER_BASS( bufs [1] );
+	BLIP_READER_BEGIN( left, bufs [1] );
+	BLIP_READER_BEGIN( right, bufs [2] );
+	BLIP_READER_BEGIN( center, bufs [0] );
+	
+	for ( ; count; --count )
+	{
+		int c = BLIP_READER_READ( center );
+		blargg_long l = c + BLIP_READER_READ( left );
+		blargg_long r = c + BLIP_READER_READ( right );
+		if ( (int16_t) l != l )
+			l = 0x7FFF - (l >> 24);
+		
+		BLIP_READER_NEXT( center, bass );
+		if ( (int16_t) r != r )
+			r = 0x7FFF - (r >> 24);
+		
+		BLIP_READER_NEXT( left, bass );
+		BLIP_READER_NEXT( right, bass );
+		
+		out [0] = l;
+		out [1] = r;
+		out += 2;
+	}
+	
+	BLIP_READER_END( center, bufs [0] );
+	BLIP_READER_END( right, bufs [2] );
+	BLIP_READER_END( left, bufs [1] );
+}
+
+void Stereo_Buffer::mix_stereo_no_center( blip_sample_t* out_, blargg_long count )
+{
+	blip_sample_t* BLIP_RESTRICT out = out_;
+	int const bass = BLIP_READER_BASS( bufs [1] );
+	BLIP_READER_BEGIN( left, bufs [1] );
+	BLIP_READER_BEGIN( right, bufs [2] );
+	
+	for ( ; count; --count )
+	{
+		blargg_long l = BLIP_READER_READ( left );
+		if ( (int16_t) l != l )
+			l = 0x7FFF - (l >> 24);
+		
+		blargg_long r = BLIP_READER_READ( right );
+		if ( (int16_t) r != r )
+			r = 0x7FFF - (r >> 24);
+		
+		BLIP_READER_NEXT( left, bass );
+		BLIP_READER_NEXT( right, bass );
+		
+		out [0] = l;
+		out [1] = r;
+		out += 2;
+	}
+	
+	BLIP_READER_END( right, bufs [2] );
+	BLIP_READER_END( left, bufs [1] );
+}
+
+void Stereo_Buffer::mix_mono( blip_sample_t* out_, blargg_long count )
+{
+	blip_sample_t* BLIP_RESTRICT out = out_;
+	int const bass = BLIP_READER_BASS( bufs [0] );
+	BLIP_READER_BEGIN( center, bufs [0] );
+	
+	for ( ; count; --count )
+	{
+		blargg_long s = BLIP_READER_READ( center );
+		if ( (int16_t) s != s )
+			s = 0x7FFF - (s >> 24);
+		
+		BLIP_READER_NEXT( center, bass );
+		out [0] = s;
+		out [1] = s;
+		out += 2;
+	}
+	
+	BLIP_READER_END( center, bufs [0] );
+}
diff --git a/libraries/game-music-emu/gme/Multi_Buffer.h b/libraries/game-music-emu/gme/Multi_Buffer.h
new file mode 100644
index 000000000..82c8b3ab5
--- /dev/null
+++ b/libraries/game-music-emu/gme/Multi_Buffer.h
@@ -0,0 +1,158 @@
+// Multi-channel sound buffer interface, and basic mono and stereo buffers
+
+// Blip_Buffer 0.4.1
+#ifndef MULTI_BUFFER_H
+#define MULTI_BUFFER_H
+
+#include "blargg_common.h"
+#include "Blip_Buffer.h"
+
+// Interface to one or more Blip_Buffers mapped to one or more channels
+// consisting of left, center, and right buffers.
+class Multi_Buffer {
+public:
+	Multi_Buffer( int samples_per_frame );
+	virtual ~Multi_Buffer() { }
+	
+	// Set the number of channels available
+	virtual blargg_err_t set_channel_count( int );
+	
+	// Get indexed channel, from 0 to channel count - 1
+	struct channel_t {
+		Blip_Buffer* center;
+		Blip_Buffer* left;
+		Blip_Buffer* right;
+	};
+	enum { type_index_mask = 0xFF };
+	enum { wave_type = 0x100, noise_type = 0x200, mixed_type = wave_type | noise_type };
+	virtual channel_t channel( int index, int type ) = 0;
+	
+	// See Blip_Buffer.h
+	virtual blargg_err_t set_sample_rate( long rate, int msec = blip_default_length ) = 0;
+	virtual void clock_rate( long ) = 0;
+	virtual void bass_freq( int ) = 0;
+	virtual void clear() = 0;
+	long sample_rate() const;
+	
+	// Length of buffer, in milliseconds
+	int length() const;
+	
+	// See Blip_Buffer.h
+	virtual void end_frame( blip_time_t ) = 0;
+	
+	// Number of samples per output frame (1 = mono, 2 = stereo)
+	int samples_per_frame() const;
+	
+	// Count of changes to channel configuration. Incremented whenever
+	// a change is made to any of the Blip_Buffers for any channel.
+	unsigned channels_changed_count() { return channels_changed_count_; }
+	
+	// See Blip_Buffer.h
+	virtual long read_samples( blip_sample_t*, long ) = 0;
+	virtual long samples_avail() const = 0;
+	
+public:
+	BLARGG_DISABLE_NOTHROW
+protected:
+	void channels_changed() { channels_changed_count_++; }
+private:
+	// noncopyable
+	Multi_Buffer( const Multi_Buffer& );
+	Multi_Buffer& operator = ( const Multi_Buffer& );
+	
+	unsigned channels_changed_count_;
+	long sample_rate_;
+	int length_;
+	int const samples_per_frame_;
+};
+
+// Uses a single buffer and outputs mono samples.
+class Mono_Buffer : public Multi_Buffer {
+	Blip_Buffer buf;
+	channel_t chan;
+public:
+	// Buffer used for all channels
+	Blip_Buffer* center() { return &buf; }
+	
+public:
+	Mono_Buffer();
+	~Mono_Buffer();
+	blargg_err_t set_sample_rate( long rate, int msec = blip_default_length );
+	void clock_rate( long rate ) { buf.clock_rate( rate ); }
+	void bass_freq( int freq ) { buf.bass_freq( freq ); }
+	void clear() { buf.clear(); }
+	long samples_avail() const { return buf.samples_avail(); }
+	long read_samples( blip_sample_t* p, long s ) { return buf.read_samples( p, s ); }
+	channel_t channel( int, int ) { return chan; }
+	void end_frame( blip_time_t t ) { buf.end_frame( t ); }
+};
+
+// Uses three buffers (one for center) and outputs stereo sample pairs.
+class Stereo_Buffer : public Multi_Buffer {
+public:
+	
+	// Buffers used for all channels
+	Blip_Buffer* center()       { return &bufs [0]; }
+	Blip_Buffer* left()         { return &bufs [1]; }
+	Blip_Buffer* right()        { return &bufs [2]; }
+	
+public:
+	Stereo_Buffer();
+	~Stereo_Buffer();
+	blargg_err_t set_sample_rate( long, int msec = blip_default_length );
+	void clock_rate( long );
+	void bass_freq( int );
+	void clear();
+	channel_t channel( int, int ) { return chan; }
+	void end_frame( blip_time_t );
+	
+	long samples_avail() const { return bufs [0].samples_avail() * 2; }
+	long read_samples( blip_sample_t*, long );
+	
+private:
+	enum { buf_count = 3 };
+	Blip_Buffer bufs [buf_count];
+	channel_t chan;
+	int stereo_added;
+	int was_stereo;
+	
+	void mix_stereo_no_center( blip_sample_t*, blargg_long );
+	void mix_stereo( blip_sample_t*, blargg_long );
+	void mix_mono( blip_sample_t*, blargg_long );
+};
+
+// Silent_Buffer generates no samples, useful where no sound is wanted
+class Silent_Buffer : public Multi_Buffer {
+	channel_t chan;
+public:
+	Silent_Buffer();
+	blargg_err_t set_sample_rate( long rate, int msec = blip_default_length );
+	void clock_rate( long ) { }
+	void bass_freq( int ) { }
+	void clear() { }
+	channel_t channel( int, int ) { return chan; }
+	void end_frame( blip_time_t ) { }
+	long samples_avail() const { return 0; }
+	long read_samples( blip_sample_t*, long ) { return 0; }
+};
+
+
+inline blargg_err_t Multi_Buffer::set_sample_rate( long rate, int msec )
+{
+	sample_rate_ = rate;
+	length_ = msec;
+	return 0;
+}
+
+inline blargg_err_t Silent_Buffer::set_sample_rate( long rate, int msec )
+{
+	return Multi_Buffer::set_sample_rate( rate, msec );
+}
+
+inline int Multi_Buffer::samples_per_frame() const { return samples_per_frame_; }
+
+inline long Multi_Buffer::sample_rate() const { return sample_rate_; }
+
+inline int Multi_Buffer::length() const { return length_; }
+
+#endif
diff --git a/libraries/game-music-emu/gme/Music_Emu.cpp b/libraries/game-music-emu/gme/Music_Emu.cpp
new file mode 100644
index 000000000..e60e7ca5d
--- /dev/null
+++ b/libraries/game-music-emu/gme/Music_Emu.cpp
@@ -0,0 +1,451 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Music_Emu.h"
+
+#include "Multi_Buffer.h"
+#include <string.h>
+
+/* Copyright (C) 2003-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+int const silence_max = 6; // seconds
+int const silence_threshold = 0x10;
+long const fade_block_size = 512;
+int const fade_shift = 8; // fade ends with gain at 1.0 / (1 << fade_shift)
+
+Music_Emu::equalizer_t const Music_Emu::tv_eq =
+	Music_Emu::make_equalizer( -8.0, 180 );
+
+void Music_Emu::clear_track_vars()
+{
+	current_track_   = -1;
+	out_time         = 0;
+	emu_time         = 0;
+	emu_track_ended_ = true;
+	track_ended_     = true;
+	fade_start       = INT_MAX / 2 + 1;
+	fade_step        = 1;
+	silence_time     = 0;
+	silence_count    = 0;
+	buf_remain       = 0;
+	warning(); // clear warning
+}
+
+void Music_Emu::unload()
+{
+	voice_count_ = 0;
+	clear_track_vars();
+	Gme_File::unload();
+}
+
+Music_Emu::Music_Emu()
+{
+	effects_buffer = 0;
+	multi_channel_ = false;
+	sample_rate_ = 0;
+	mute_mask_   = 0;
+	tempo_       = 1.0;
+	gain_        = 1.0;
+	
+	// defaults
+	max_initial_silence = 2;
+	silence_lookahead   = 3;
+	ignore_silence_     = false;
+	equalizer_.treble   = -1.0;
+	equalizer_.bass     = 60;
+	
+	emu_autoload_playback_limit_ = true;
+
+	static const char* const names [] = {
+		"Voice 1", "Voice 2", "Voice 3", "Voice 4",
+		"Voice 5", "Voice 6", "Voice 7", "Voice 8"
+	};
+	set_voice_names( names );
+	Music_Emu::unload(); // non-virtual
+}
+
+Music_Emu::~Music_Emu() { delete effects_buffer; }
+
+blargg_err_t Music_Emu::set_sample_rate( long rate )
+{
+	require( !sample_rate() ); // sample rate can't be changed once set
+	RETURN_ERR( set_sample_rate_( rate ) );
+	RETURN_ERR( buf.resize( buf_size ) );
+	sample_rate_ = rate;
+	return 0;
+}
+
+void Music_Emu::pre_load()
+{
+	require( sample_rate() ); // set_sample_rate() must be called before loading a file
+	Gme_File::pre_load();
+}
+
+void Music_Emu::set_equalizer( equalizer_t const& eq )
+{
+	equalizer_ = eq;
+	set_equalizer_( eq );
+}
+
+bool Music_Emu::multi_channel() const
+{
+	return this->multi_channel_;
+}
+
+blargg_err_t Music_Emu::set_multi_channel( bool )
+{
+	// by default not supported, derived may override this
+	return "unsupported for this emulator type";
+}
+
+blargg_err_t Music_Emu::set_multi_channel_( bool isEnabled )
+{
+	// multi channel support must be set at the very beginning
+	require( !sample_rate() );
+	multi_channel_ = isEnabled;
+	return 0;
+}
+
+void Music_Emu::mute_voice( int index, bool mute )
+{
+	require( (unsigned) index < (unsigned) voice_count() );
+	int bit = 1 << index;
+	int mask = mute_mask_ | bit;
+	if ( !mute )
+		mask ^= bit;
+	mute_voices( mask );
+}
+
+void Music_Emu::mute_voices( int mask )
+{
+	require( sample_rate() ); // sample rate must be set first
+	mute_mask_ = mask;
+	mute_voices_( mask );
+}
+
+void Music_Emu::set_tempo( double t )
+{
+	require( sample_rate() ); // sample rate must be set first
+	double const min = 0.02;
+	double const max = 4.00;
+	if ( t < min ) t = min;
+	if ( t > max ) t = max;
+	tempo_ = t;
+	set_tempo_( t );
+}
+
+void Music_Emu::post_load_()
+{
+	set_tempo( tempo_ );
+	remute_voices();
+}
+
+blargg_err_t Music_Emu::start_track( int track )
+{
+	clear_track_vars();
+	
+	int remapped = track;
+	RETURN_ERR( remap_track_( &remapped ) );
+	current_track_ = track;
+	RETURN_ERR( start_track_( remapped ) );
+	
+	emu_track_ended_ = false;
+	track_ended_     = false;
+	
+	if ( !ignore_silence_ )
+	{
+		// play until non-silence or end of track
+		for ( long end = max_initial_silence * out_channels() * sample_rate(); emu_time < end; )
+		{
+			fill_buf();
+			if ( buf_remain | (int) emu_track_ended_ )
+				break;
+		}
+		
+		emu_time      = buf_remain;
+		out_time      = 0;
+		silence_time  = 0;
+		silence_count = 0;
+	}
+	return track_ended() ? warning() : 0;
+}
+
+void Music_Emu::end_track_if_error( blargg_err_t err )
+{
+	if ( err )
+	{
+		emu_track_ended_ = true;
+		set_warning( err );
+	}
+}
+
+bool Music_Emu::autoload_playback_limit() const
+{
+	return emu_autoload_playback_limit_;
+}
+
+void Music_Emu::set_autoload_playback_limit( bool do_autoload_limit )
+{
+	emu_autoload_playback_limit_ = do_autoload_limit;
+}
+
+// Tell/Seek
+
+blargg_long Music_Emu::msec_to_samples( blargg_long msec ) const
+{
+	blargg_long sec = msec / 1000;
+	msec -= sec * 1000;
+	return (sec * sample_rate() + msec * sample_rate() / 1000) * out_channels();
+}
+
+long Music_Emu::tell_samples() const
+{
+	return out_time;
+}
+
+long Music_Emu::tell() const
+{
+	blargg_long rate = sample_rate() * out_channels();
+	blargg_long sec = out_time / rate;
+	return sec * 1000 + (out_time - sec * rate) * 1000 / rate;
+}
+
+blargg_err_t Music_Emu::seek_samples( long time )
+{
+	if ( time < out_time )
+		RETURN_ERR( start_track( current_track_ ) );
+	return skip( time - out_time );
+}
+
+blargg_err_t Music_Emu::seek( long msec )
+{
+	return seek_samples( msec_to_samples( msec ) );
+}
+
+blargg_err_t Music_Emu::skip( long count )
+{
+	require( current_track() >= 0 ); // start_track() must have been called already
+	out_time += count;
+	
+	// remove from silence and buf first
+	{
+		long n = min( count, silence_count );
+		silence_count -= n;
+		count -= n;
+		
+		n = min( count, buf_remain );
+		buf_remain -= n;
+		count -= n;
+	}
+		
+	if ( count && !emu_track_ended_ )
+	{
+		emu_time += count;
+		end_track_if_error( skip_( count ) );
+	}
+	
+	if ( !(silence_count | buf_remain) ) // caught up to emulator, so update track ended
+		track_ended_ |= emu_track_ended_;
+	
+	return 0;
+}
+
+blargg_err_t Music_Emu::skip_( long count )
+{
+	// for long skip, mute sound
+	const long threshold = 30000;
+	if ( count > threshold )
+	{
+		int saved_mute = mute_mask_;
+		mute_voices( ~0 );
+		
+		while ( count > threshold / 2 && !emu_track_ended_ )
+		{
+			RETURN_ERR( play_( buf_size, buf.begin() ) );
+			count -= buf_size;
+		}
+		
+		mute_voices( saved_mute );
+	}
+	
+	while ( count && !emu_track_ended_ )
+	{
+		long n = buf_size;
+		if ( n > count )
+			n = count;
+		count -= n;
+		RETURN_ERR( play_( n, buf.begin() ) );
+	}
+	return 0;
+}
+
+// Fading
+
+void Music_Emu::set_fade( long start_msec, long length_msec )
+{
+	fade_step = sample_rate() * length_msec / (fade_block_size * fade_shift * 1000 / out_channels());
+	fade_start = msec_to_samples( start_msec );
+}
+
+// unit / pow( 2.0, (double) x / step )
+static int int_log( blargg_long x, int step, int unit )
+{
+	int shift = x / step;
+	int fraction = (x - shift * step) * unit / step;
+	return ((unit - fraction) + (fraction >> 1)) >> shift;
+}
+
+void Music_Emu::handle_fade( long out_count, sample_t* out )
+{
+	for ( int i = 0; i < out_count; i += fade_block_size )
+	{
+		int const shift = 14;
+		int const unit = 1 << shift;
+		int gain = int_log( (out_time + i - fade_start) / fade_block_size,
+				fade_step, unit );
+		if ( gain < (unit >> fade_shift) )
+			track_ended_ = emu_track_ended_ = true;
+		
+		sample_t* io = &out [i];
+		for ( int count = min( fade_block_size, out_count - i ); count; --count )
+		{
+			*io = sample_t ((*io * gain) >> shift);
+			++io;
+		}
+	}
+}
+
+// Silence detection
+
+void Music_Emu::emu_play( long count, sample_t* out )
+{
+	check( current_track_ >= 0 );
+	emu_time += count;
+	if ( current_track_ >= 0 && !emu_track_ended_ )
+		end_track_if_error( play_( count, out ) );
+	else
+		memset( out, 0, count * sizeof *out );
+}
+
+// number of consecutive silent samples at end
+static long count_silence( Music_Emu::sample_t* begin, long size )
+{
+	Music_Emu::sample_t first = *begin;
+	*begin = silence_threshold; // sentinel
+	Music_Emu::sample_t* p = begin + size;
+	while ( (unsigned) (*--p + silence_threshold / 2) <= (unsigned) silence_threshold ) { }
+	*begin = first;
+	return size - (p - begin);
+}
+
+// fill internal buffer and check it for silence
+void Music_Emu::fill_buf()
+{
+	assert( !buf_remain );
+	if ( !emu_track_ended_ )
+	{
+		emu_play( buf_size, buf.begin() );
+		long silence = count_silence( buf.begin(), buf_size );
+		if ( silence < buf_size )
+		{
+			silence_time = emu_time - silence;
+			buf_remain   = buf_size;
+			return;
+		}
+	}
+	silence_count += buf_size;
+}
+
+blargg_err_t Music_Emu::play( long out_count, sample_t* out )
+{
+	if ( track_ended_ )
+	{
+		memset( out, 0, out_count * sizeof *out );
+	}
+	else
+	{
+		require( current_track() >= 0 );
+		require( out_count % out_channels() == 0 );
+		
+		assert( emu_time >= out_time );
+		
+		// prints nifty graph of how far ahead we are when searching for silence
+		//debug_printf( "%*s \n", int ((emu_time - out_time) * 7 / sample_rate()), "*" );
+		
+		long pos = 0;
+		if ( silence_count )
+		{
+			// during a run of silence, run emulator at >=2x speed so it gets ahead
+			long ahead_time = silence_lookahead * (out_time + out_count - silence_time) + silence_time;
+			while ( emu_time < ahead_time && !(buf_remain | emu_track_ended_) )
+				fill_buf();
+			
+			// fill with silence
+			pos = min( silence_count, out_count );
+			memset( out, 0, pos * sizeof *out );
+			silence_count -= pos;
+			
+			if ( emu_time - silence_time > silence_max * out_channels() * sample_rate() )
+			{
+				track_ended_  = emu_track_ended_ = true;
+				silence_count = 0;
+				buf_remain    = 0;
+			}
+		}
+		
+		if ( buf_remain )
+		{
+			// empty silence buf
+			long n = min( buf_remain, out_count - pos );
+			memcpy( &out [pos], buf.begin() + (buf_size - buf_remain), n * sizeof *out );
+			buf_remain -= n;
+			pos += n;
+		}
+		
+		// generate remaining samples normally
+		long remain = out_count - pos;
+		if ( remain )
+		{
+			emu_play( remain, out + pos );
+			track_ended_ |= emu_track_ended_;
+			
+			if ( !ignore_silence_ || out_time > fade_start )
+			{
+				// check end for a new run of silence
+				long silence = count_silence( out + pos, remain );
+				if ( silence < remain )
+					silence_time = emu_time - silence;
+				
+				if ( emu_time - silence_time >= buf_size )
+					fill_buf(); // cause silence detection on next play()
+			}
+		}
+		
+		if ( fade_start >= 0 && out_time > fade_start )
+			handle_fade( out_count, out );
+	}
+	out_time += out_count;
+	return 0;
+}
+
+// Gme_Info_
+
+blargg_err_t Gme_Info_::set_sample_rate_( long )            { return 0; }
+void         Gme_Info_::pre_load()                          { Gme_File::pre_load(); } // skip Music_Emu
+void         Gme_Info_::post_load_()                        { Gme_File::post_load_(); } // skip Music_Emu
+void         Gme_Info_::set_equalizer_( equalizer_t const& ){ check( false ); }
+void         Gme_Info_::enable_accuracy_( bool )            { check( false ); }
+void         Gme_Info_::mute_voices_( int )                 { check( false ); }
+void         Gme_Info_::set_tempo_( double )                { }
+blargg_err_t Gme_Info_::start_track_( int )                 { return "Use full emulator for playback"; }
+blargg_err_t Gme_Info_::play_( long, sample_t* )            { return "Use full emulator for playback"; }
diff --git a/libraries/game-music-emu/gme/Music_Emu.h b/libraries/game-music-emu/gme/Music_Emu.h
new file mode 100644
index 000000000..3aafa5ec1
--- /dev/null
+++ b/libraries/game-music-emu/gme/Music_Emu.h
@@ -0,0 +1,252 @@
+// Common interface to game music file emulators
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef MUSIC_EMU_H
+#define MUSIC_EMU_H
+
+#include "Gme_File.h"
+class Multi_Buffer;
+
+struct Music_Emu : public Gme_File {
+public:
+// Basic functionality (see Gme_File.h for file loading/track info functions)
+
+	// Set output sample rate. Must be called only once before loading file.
+	blargg_err_t set_sample_rate( long sample_rate );
+
+	// specifies if all 8 voices get rendered to their own stereo channel
+	// default implementation of Music_Emu always returns not supported error (i.e. no multichannel support by default)
+	// derived emus must override this if they support multichannel rendering
+	virtual blargg_err_t set_multi_channel( bool is_enabled );
+	
+	// Start a track, where 0 is the first track. Also clears warning string.
+	blargg_err_t start_track( int );
+	
+	// Generate 'count' samples info 'buf'. Output is in stereo. Any emulation
+	// errors set warning string, and major errors also end track.
+	typedef short sample_t;
+	blargg_err_t play( long count, sample_t* buf );
+	
+// Informational
+	
+	// Sample rate sound is generated at
+	long sample_rate() const;
+	
+	// Index of current track or -1 if one hasn't been started
+	int current_track() const;
+	
+	// Number of voices used by currently loaded file
+	int voice_count() const;
+	
+	// Names of voices
+	const char** voice_names() const;
+
+	bool multi_channel() const;
+	
+// Track status/control
+
+	// Number of milliseconds (1000 msec = 1 second) played since beginning of track
+	long tell() const;
+	
+	// Number of samples generated since beginning of track
+	long tell_samples() const;
+
+	// Seek to new time in track. Seeking backwards or far forward can take a while.
+	blargg_err_t seek( long msec );
+	
+	// Equivalent to restarting track then skipping n samples
+	blargg_err_t seek_samples( long n );
+	
+	// Skip n samples
+	blargg_err_t skip( long n );
+	
+	// True if a track has reached its end
+	bool track_ended() const;
+	
+	// Set start time and length of track fade out. Once fade ends track_ended() returns
+	// true. Fade time can be changed while track is playing.
+	void set_fade( long start_msec, long length_msec = 8000 );
+	
+	// Controls whether or not to automatically load and obey track length
+	// metadata for supported emulators.
+	//
+	// @since 0.6.2.
+	bool autoload_playback_limit() const;
+	void set_autoload_playback_limit( bool do_autoload_limit );
+
+	// Disable automatic end-of-track detection and skipping of silence at beginning
+	void ignore_silence( bool disable = true );
+	
+	// Info for current track
+	using Gme_File::track_info;
+	blargg_err_t track_info( track_info_t* out ) const;
+	
+// Sound customization
+	
+	// Adjust song tempo, where 1.0 = normal, 0.5 = half speed, 2.0 = double speed.
+	// Track length as returned by track_info() assumes a tempo of 1.0.
+	void set_tempo( double );
+	
+	// Mute/unmute voice i, where voice 0 is first voice
+	void mute_voice( int index, bool mute = true );
+	
+	// Set muting state of all voices at once using a bit mask, where -1 mutes them all,
+	// 0 unmutes them all, 0x01 mutes just the first voice, etc.
+	void mute_voices( int mask );
+	
+	// Change overall output amplitude, where 1.0 results in minimal clamping.
+	// Must be called before set_sample_rate().
+	void set_gain( double );
+	
+	// Request use of custom multichannel buffer. Only supported by "classic" emulators;
+	// on others this has no effect. Should be called only once *before* set_sample_rate().
+	virtual void set_buffer( Multi_Buffer* ) { }
+	
+	// Enables/disables accurate emulation options, if any are supported. Might change
+	// equalizer settings.
+	void enable_accuracy( bool enable = true );
+	
+// Sound equalization (treble/bass)
+
+	// Frequency equalizer parameters (see gme.txt)
+	// See gme.h for definition of struct gme_equalizer_t.
+	typedef gme_equalizer_t equalizer_t;
+	
+	// Current frequency equalizater parameters
+	equalizer_t const& equalizer() const;
+	
+	// Set frequency equalizer parameters
+	void set_equalizer( equalizer_t const& );
+
+	// Construct equalizer of given treble/bass settings
+	static const equalizer_t make_equalizer( double treble, double bass )
+	{
+	    const Music_Emu::equalizer_t e = { treble, bass,
+		0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
+	    return e;
+	}
+	
+	// Equalizer settings for TV speaker
+	static equalizer_t const tv_eq;
+	
+public:
+	Music_Emu();
+	~Music_Emu();
+protected:
+	void set_max_initial_silence( int n )       { max_initial_silence = n; }
+	void set_silence_lookahead( int n )         { silence_lookahead = n; }
+	void set_voice_count( int n )               { voice_count_ = n; }
+	void set_voice_names( const char* const* names );
+	void set_track_ended()                      { emu_track_ended_ = true; }
+	double gain() const                         { return gain_; }
+	double tempo() const                        { return tempo_; }
+	void remute_voices();
+	blargg_err_t set_multi_channel_( bool is_enabled );
+	
+	virtual blargg_err_t set_sample_rate_( long sample_rate ) = 0;
+	virtual void set_equalizer_( equalizer_t const& ) { }
+	virtual void enable_accuracy_( bool /* enable */ ) { }
+	virtual void mute_voices_( int mask ) = 0;
+	virtual void set_tempo_( double ) = 0;
+	virtual blargg_err_t start_track_( int ) = 0; // tempo is set before this
+	virtual blargg_err_t play_( long count, sample_t* out ) = 0;
+	virtual blargg_err_t skip_( long count );
+protected:
+	virtual void unload();
+	virtual void pre_load();
+	virtual void post_load_();
+private:
+	// general
+	equalizer_t equalizer_;
+	int max_initial_silence;
+	const char** voice_names_;
+	int voice_count_;
+	int mute_mask_;
+	double tempo_;
+	double gain_;
+	bool multi_channel_;
+
+	// returns the number of output channels, i.e. usually 2 for stereo, unlesss multi_channel_ == true
+	int out_channels() const { return this->multi_channel() ? 2*8 : 2; }
+
+	long sample_rate_;
+	blargg_long msec_to_samples( blargg_long msec ) const;
+	
+	// track-specific
+	int current_track_;
+	blargg_long out_time;  // number of samples played since start of track
+	blargg_long emu_time;  // number of samples emulator has generated since start of track
+	bool emu_track_ended_; // emulator has reached end of track
+	bool emu_autoload_playback_limit_; // whether to load and obey track length by default
+	volatile bool track_ended_;
+	void clear_track_vars();
+	void end_track_if_error( blargg_err_t );
+	
+	// fading
+	blargg_long fade_start;
+	int fade_step;
+	void handle_fade( long count, sample_t* out );
+	
+	// silence detection
+	int silence_lookahead; // speed to run emulator when looking ahead for silence
+	bool ignore_silence_;
+	long silence_time;     // number of samples where most recent silence began
+	long silence_count;    // number of samples of silence to play before using buf
+	long buf_remain;       // number of samples left in silence buffer
+	enum { buf_size = 2048 };
+	blargg_vector<sample_t> buf;
+	void fill_buf();
+	void emu_play( long count, sample_t* out );
+	
+	Multi_Buffer* effects_buffer;
+	friend Music_Emu* gme_internal_new_emu_( gme_type_t, int, bool );
+	friend void gme_set_stereo_depth( Music_Emu*, double );
+};
+
+// base class for info-only derivations
+struct Gme_Info_ : Music_Emu
+{
+	virtual blargg_err_t set_sample_rate_( long sample_rate );
+	virtual void set_equalizer_( equalizer_t const& );
+	virtual void enable_accuracy_( bool );
+	virtual void mute_voices_( int mask );
+	virtual void set_tempo_( double );
+	virtual blargg_err_t start_track_( int );
+	virtual blargg_err_t play_( long count, sample_t* out );
+	virtual void pre_load();
+	virtual void post_load_();
+};
+
+inline blargg_err_t Music_Emu::track_info( track_info_t* out ) const
+{
+	return track_info( out, current_track_ );
+}
+
+inline long Music_Emu::sample_rate() const          { return sample_rate_; }
+inline const char** Music_Emu::voice_names() const  { return voice_names_; }
+inline int Music_Emu::voice_count() const           { return voice_count_; }
+inline int Music_Emu::current_track() const         { return current_track_; }
+inline bool Music_Emu::track_ended() const          { return track_ended_; }
+inline const Music_Emu::equalizer_t& Music_Emu::equalizer() const { return equalizer_; }
+
+inline void Music_Emu::enable_accuracy( bool b )    { enable_accuracy_( b ); }
+inline void Music_Emu::set_tempo_( double t )       { tempo_ = t; }
+inline void Music_Emu::remute_voices()              { mute_voices( mute_mask_ ); }
+inline void Music_Emu::ignore_silence( bool b )     { ignore_silence_ = b; }
+inline blargg_err_t Music_Emu::start_track_( int )  { return 0; }
+
+inline void Music_Emu::set_voice_names( const char* const* names )
+{
+	// Intentional removal of const, so users don't have to remember obscure const in middle
+	voice_names_ = const_cast<const char**> (names);
+}
+
+inline void Music_Emu::mute_voices_( int ) { }
+
+inline void Music_Emu::set_gain( double g )
+{
+	assert( !sample_rate() ); // you must set gain before setting sample rate
+	gain_ = g;
+}
+
+#endif
diff --git a/libraries/game-music-emu/gme/Nes_Apu.cpp b/libraries/game-music-emu/gme/Nes_Apu.cpp
new file mode 100644
index 000000000..68edb446d
--- /dev/null
+++ b/libraries/game-music-emu/gme/Nes_Apu.cpp
@@ -0,0 +1,391 @@
+// Nes_Snd_Emu 0.1.8. http://www.slack.net/~ant/
+
+#include "Nes_Apu.h"
+
+/* Copyright (C) 2003-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+int const amp_range = 15;
+
+Nes_Apu::Nes_Apu() :
+	square1( &square_synth ),
+	square2( &square_synth )
+{
+	tempo_ = 1.0;
+	dmc.apu = this;
+	dmc.prg_reader = NULL;
+	irq_notifier_ = NULL;
+	
+	oscs [0] = &square1;
+	oscs [1] = &square2;
+	oscs [2] = &triangle;
+	oscs [3] = &noise;
+	oscs [4] = &dmc;
+	
+	output( NULL );
+	volume( 1.0 );
+	reset( false );
+}
+
+void Nes_Apu::treble_eq( const blip_eq_t& eq )
+{
+	square_synth.treble_eq( eq );
+	triangle.synth.treble_eq( eq );
+	noise.synth.treble_eq( eq );
+	dmc.synth.treble_eq( eq );
+}
+
+void Nes_Apu::enable_nonlinear( double v )
+{
+	dmc.nonlinear = true;
+	square_synth.volume( 1.3 * 0.25751258 / 0.742467605 * 0.25 / amp_range * v );
+	
+	const double tnd = 0.48 / 202 * nonlinear_tnd_gain();
+	triangle.synth.volume( 3.0 * tnd );
+	noise.synth.volume( 2.0 * tnd );
+	dmc.synth.volume( tnd );
+	
+	square1 .last_amp = 0;
+	square2 .last_amp = 0;
+	triangle.last_amp = 0;
+	noise   .last_amp = 0;
+	dmc     .last_amp = 0;
+}
+
+void Nes_Apu::volume( double v )
+{
+	dmc.nonlinear = false;
+	square_synth.volume(   0.1128  / amp_range * v );
+	triangle.synth.volume( 0.12765 / amp_range * v );
+	noise.synth.volume(    0.0741  / amp_range * v );
+	dmc.synth.volume(      0.42545 / 127 * v );
+}
+
+void Nes_Apu::output( Blip_Buffer* buffer )
+{
+	for ( int i = 0; i < osc_count; i++ )
+		osc_output( i, buffer );
+}
+
+void Nes_Apu::set_tempo( double t )
+{
+	tempo_ = t;
+	frame_period = (dmc.pal_mode ? 8314 : 7458);
+	if ( t != 1.0 )
+		frame_period = (int) (frame_period / t) & ~1; // must be even
+}
+
+void Nes_Apu::reset( bool pal_mode, int initial_dmc_dac )
+{
+	dmc.pal_mode = pal_mode;
+	set_tempo( tempo_ );
+	
+	square1.reset();
+	square2.reset();
+	triangle.reset();
+	noise.reset();
+	dmc.reset();
+	
+	last_time = 0;
+	last_dmc_time = 0;
+	osc_enables = 0;
+	irq_flag = false;
+	earliest_irq_ = no_irq;
+	frame_delay = 1;
+	write_register( 0, 0x4017, 0x00 );
+	write_register( 0, 0x4015, 0x00 );
+	
+	for ( nes_addr_t addr = start_addr; addr <= 0x4013; addr++ )
+		write_register( 0, addr, (addr & 3) ? 0x00 : 0x10 );
+	
+	dmc.dac = initial_dmc_dac;
+	if ( !dmc.nonlinear )
+		triangle.last_amp = 15;
+	if ( !dmc.nonlinear ) // TODO: remove?
+		dmc.last_amp = initial_dmc_dac; // prevent output transition
+}
+
+void Nes_Apu::irq_changed()
+{
+	nes_time_t new_irq = dmc.next_irq;
+	if ( dmc.irq_flag | irq_flag ) {
+		new_irq = 0;
+	}
+	else if ( new_irq > next_irq ) {
+		new_irq = next_irq;
+	}
+	
+	if ( new_irq != earliest_irq_ ) {
+		earliest_irq_ = new_irq;
+		if ( irq_notifier_ )
+			irq_notifier_( irq_data );
+	}
+}
+
+// frames
+
+void Nes_Apu::run_until( nes_time_t end_time )
+{
+	require( end_time >= last_dmc_time );
+	if ( end_time > next_dmc_read_time() )
+	{
+		nes_time_t start = last_dmc_time;
+		last_dmc_time = end_time;
+		dmc.run( start, end_time );
+	}
+}
+
+void Nes_Apu::run_until_( nes_time_t end_time )
+{
+	require( end_time >= last_time );
+	
+	if ( end_time == last_time )
+		return;
+	
+	if ( last_dmc_time < end_time )
+	{
+		nes_time_t start = last_dmc_time;
+		last_dmc_time = end_time;
+		dmc.run( start, end_time );
+	}
+	
+	while ( true )
+	{
+		// earlier of next frame time or end time
+		nes_time_t time = last_time + frame_delay;
+		if ( time > end_time )
+			time = end_time;
+		frame_delay -= time - last_time;
+		
+		// run oscs to present
+		square1.run( last_time, time );
+		square2.run( last_time, time );
+		triangle.run( last_time, time );
+		noise.run( last_time, time );
+		last_time = time;
+		
+		if ( time == end_time )
+			break; // no more frames to run
+		
+		// take frame-specific actions
+		frame_delay = frame_period;
+		switch ( frame++ )
+		{
+			case 0:
+				if ( !(frame_mode & 0xC0) ) {
+		 			next_irq = time + frame_period * 4 + 2;
+		 			irq_flag = true;
+		 		}
+		 		// fall through
+		 	case 2:
+		 		// clock length and sweep on frames 0 and 2
+				square1.clock_length( 0x20 );
+				square2.clock_length( 0x20 );
+				noise.clock_length( 0x20 );
+				triangle.clock_length( 0x80 ); // different bit for halt flag on triangle
+				
+				square1.clock_sweep( -1 );
+				square2.clock_sweep( 0 );
+				
+				// frame 2 is slightly shorter in mode 1
+				if ( dmc.pal_mode && frame == 3 )
+					frame_delay -= 2;
+		 		break;
+		 	
+			case 1:
+				// frame 1 is slightly shorter in mode 0
+				if ( !dmc.pal_mode )
+					frame_delay -= 2;
+				break;
+			
+		 	case 3:
+		 		frame = 0;
+		 		
+		 		// frame 3 is almost twice as long in mode 1
+		 		if ( frame_mode & 0x80 )
+					frame_delay += frame_period - (dmc.pal_mode ? 2 : 6);
+				break;
+		}
+		
+		// clock envelopes and linear counter every frame
+		triangle.clock_linear_counter();
+		square1.clock_envelope();
+		square2.clock_envelope();
+		noise.clock_envelope();
+	}
+}
+
+template<class T>
+inline void zero_apu_osc( T* osc, nes_time_t time )
+{
+	Blip_Buffer* output = osc->output;
+	int last_amp = osc->last_amp;
+	osc->last_amp = 0;
+	if ( output && last_amp )
+		osc->synth.offset( time, -last_amp, output );
+}
+
+void Nes_Apu::end_frame( nes_time_t end_time )
+{
+	if ( end_time > last_time )
+		run_until_( end_time );
+	
+	if ( dmc.nonlinear )
+	{
+		zero_apu_osc( &square1,  last_time );
+		zero_apu_osc( &square2,  last_time );
+		zero_apu_osc( &triangle, last_time );
+		zero_apu_osc( &noise,    last_time );
+		zero_apu_osc( &dmc,      last_time );
+	}
+	
+	// make times relative to new frame
+	last_time -= end_time;
+	require( last_time >= 0 );
+	
+	last_dmc_time -= end_time;
+	require( last_dmc_time >= 0 );
+	
+	if ( next_irq != no_irq ) {
+		next_irq -= end_time;
+		check( next_irq >= 0 );
+	}
+	if ( dmc.next_irq != no_irq ) {
+		dmc.next_irq -= end_time;
+		check( dmc.next_irq >= 0 );
+	}
+	if ( earliest_irq_ != no_irq ) {
+		earliest_irq_ -= end_time;
+		if ( earliest_irq_ < 0 )
+			earliest_irq_ = 0;
+	}
+}
+
+// registers
+
+static const unsigned char length_table [0x20] = {
+	0x0A, 0xFE, 0x14, 0x02, 0x28, 0x04, 0x50, 0x06,
+	0xA0, 0x08, 0x3C, 0x0A, 0x0E, 0x0C, 0x1A, 0x0E, 
+	0x0C, 0x10, 0x18, 0x12, 0x30, 0x14, 0x60, 0x16,
+	0xC0, 0x18, 0x48, 0x1A, 0x10, 0x1C, 0x20, 0x1E
+};
+
+void Nes_Apu::write_register( nes_time_t time, nes_addr_t addr, int data )
+{
+	require( addr > 0x20 ); // addr must be actual address (i.e. 0x40xx)
+	require( (unsigned) data <= 0xFF );
+	
+	// Ignore addresses outside range
+	if ( unsigned (addr - start_addr) > end_addr - start_addr )
+		return;
+	
+	run_until_( time );
+	
+	if ( addr < 0x4014 )
+	{
+		// Write to channel
+		int osc_index = (addr - start_addr) >> 2;
+		Nes_Osc* osc = oscs [osc_index];
+		
+		int reg = addr & 3;
+		osc->regs [reg] = data;
+		osc->reg_written [reg] = true;
+		
+		if ( osc_index == 4 )
+		{
+			// handle DMC specially
+			dmc.write_register( reg, data );
+		}
+		else if ( reg == 3 )
+		{
+			// load length counter
+			if ( (osc_enables >> osc_index) & 1 )
+				osc->length_counter = length_table [(data >> 3) & 0x1F];
+			
+			// reset square phase
+			if ( osc_index < 2 )
+				((Nes_Square*) osc)->phase = Nes_Square::phase_range - 1;
+		}
+	}
+	else if ( addr == 0x4015 )
+	{
+		// Channel enables
+		for ( int i = osc_count; i--; )
+			if ( !((data >> i) & 1) )
+				oscs [i]->length_counter = 0;
+		
+		bool recalc_irq = dmc.irq_flag;
+		dmc.irq_flag = false;
+		
+		int old_enables = osc_enables;
+		osc_enables = data;
+		if ( !(data & 0x10) ) {
+			dmc.next_irq = no_irq;
+			recalc_irq = true;
+		}
+		else if ( !(old_enables & 0x10) ) {
+			dmc.start(); // dmc just enabled
+		}
+		
+		if ( recalc_irq )
+			irq_changed();
+	}
+	else if ( addr == 0x4017 )
+	{
+		// Frame mode
+		frame_mode = data;
+		
+		bool irq_enabled = !(data & 0x40);
+		irq_flag &= irq_enabled;
+		next_irq = no_irq;
+		
+		// mode 1
+		frame_delay = (frame_delay & 1);
+		frame = 0;
+		
+		if ( !(data & 0x80) )
+		{
+			// mode 0
+			frame = 1;
+			frame_delay += frame_period;
+			if ( irq_enabled )
+				next_irq = time + frame_delay + frame_period * 3 + 1;
+		}
+		
+		irq_changed();
+	}
+}
+
+int Nes_Apu::read_status( nes_time_t time )
+{
+	run_until_( time - 1 );
+	
+	int result = (dmc.irq_flag << 7) | (irq_flag << 6);
+	
+	for ( int i = 0; i < osc_count; i++ )
+		if ( oscs [i]->length_counter )
+			result |= 1 << i;
+	
+	run_until_( time );
+	
+	if ( irq_flag )
+	{
+		result |= 0x40;
+		irq_flag = false;
+		irq_changed();
+	}
+	
+	//debug_printf( "%6d/%d Read $4015->$%02X\n", frame_delay, frame, result );
+	
+	return result;
+}
diff --git a/libraries/game-music-emu/gme/Nes_Apu.h b/libraries/game-music-emu/gme/Nes_Apu.h
new file mode 100644
index 000000000..5e722248f
--- /dev/null
+++ b/libraries/game-music-emu/gme/Nes_Apu.h
@@ -0,0 +1,179 @@
+// NES 2A03 APU sound chip emulator
+
+// Nes_Snd_Emu 0.1.8
+#ifndef NES_APU_H
+#define NES_APU_H
+
+#include "blargg_common.h"
+
+typedef blargg_long nes_time_t; // CPU clock cycle count
+typedef unsigned nes_addr_t; // 16-bit memory address
+
+#include "Nes_Oscs.h"
+
+struct apu_state_t;
+class Nes_Buffer;
+
+class Nes_Apu {
+public:
+	// Set buffer to generate all sound into, or disable sound if NULL
+	void output( Blip_Buffer* );
+	
+	// Set memory reader callback used by DMC oscillator to fetch samples.
+	// When callback is invoked, 'user_data' is passed unchanged as the
+	// first parameter.
+	void dmc_reader( int (*callback)( void* user_data, nes_addr_t ), void* user_data = NULL );
+	
+	// All time values are the number of CPU clock cycles relative to the
+	// beginning of the current time frame. Before resetting the CPU clock
+	// count, call end_frame( last_cpu_time ).
+	
+	// Write to register (0x4000-0x4017, except 0x4014 and 0x4016)
+	enum { start_addr = 0x4000 };
+	enum { end_addr   = 0x4017 };
+	void write_register( nes_time_t, nes_addr_t, int data );
+	
+	// Read from status register at 0x4015
+	enum { status_addr = 0x4015 };
+	int read_status( nes_time_t );
+	
+	// Run all oscillators up to specified time, end current time frame, then
+	// start a new time frame at time 0. Time frames have no effect on emulation
+	// and each can be whatever length is convenient.
+	void end_frame( nes_time_t );
+	
+// Additional optional features (can be ignored without any problem)
+
+	// Reset internal frame counter, registers, and all oscillators.
+	// Use PAL timing if pal_timing is true, otherwise use NTSC timing.
+	// Set the DMC oscillator's initial DAC value to initial_dmc_dac without
+	// any audible click.
+	void reset( bool pal_mode = false, int initial_dmc_dac = 0 );
+	
+	// Adjust frame period
+	void set_tempo( double );
+	
+	// Save/load exact emulation state
+	void save_state( apu_state_t* out ) const;
+	void load_state( apu_state_t const& );
+	
+	// Set overall volume (default is 1.0)
+	void volume( double );
+	
+	// Set treble equalization (see notes.txt)
+	void treble_eq( const blip_eq_t& );
+	
+	// Set sound output of specific oscillator to buffer. If buffer is NULL,
+	// the specified oscillator is muted and emulation accuracy is reduced.
+	// The oscillators are indexed as follows: 0) Square 1, 1) Square 2,
+	// 2) Triangle, 3) Noise, 4) DMC.
+	enum { osc_count = 5 };
+	void osc_output( int index, Blip_Buffer* buffer );
+	
+	// Set IRQ time callback that is invoked when the time of earliest IRQ
+	// may have changed, or NULL to disable. When callback is invoked,
+	// 'user_data' is passed unchanged as the first parameter.
+	void irq_notifier( void (*callback)( void* user_data ), void* user_data = NULL );
+	
+	// Get time that APU-generated IRQ will occur if no further register reads
+	// or writes occur. If IRQ is already pending, returns irq_waiting. If no
+	// IRQ will occur, returns no_irq.
+	enum { no_irq = INT_MAX / 2 + 1 };
+	enum { irq_waiting = 0 };
+	nes_time_t earliest_irq( nes_time_t ) const;
+	
+	// Count number of DMC reads that would occur if 'run_until( t )' were executed.
+	// If last_read is not NULL, set *last_read to the earliest time that
+	// 'count_dmc_reads( time )' would result in the same result.
+	int count_dmc_reads( nes_time_t t, nes_time_t* last_read = NULL ) const;
+	
+	// Time when next DMC memory read will occur
+	nes_time_t next_dmc_read_time() const;
+	
+	// Run DMC until specified time, so that any DMC memory reads can be
+	// accounted for (i.e. inserting CPU wait states).
+	void run_until( nes_time_t );
+	
+public:
+	Nes_Apu();
+	BLARGG_DISABLE_NOTHROW
+private:
+	friend class Nes_Nonlinearizer;
+	void enable_nonlinear( double volume );
+	static double nonlinear_tnd_gain() { return 0.75; }
+private:
+	friend struct Nes_Dmc;
+	
+	// noncopyable
+	Nes_Apu( const Nes_Apu& );
+	Nes_Apu& operator = ( const Nes_Apu& );
+	
+	Nes_Osc*            oscs [osc_count];
+	Nes_Square          square1;
+	Nes_Square          square2;
+	Nes_Noise           noise;
+	Nes_Triangle        triangle;
+	Nes_Dmc             dmc;
+	
+	double tempo_;
+	nes_time_t last_time; // has been run until this time in current frame
+	nes_time_t last_dmc_time;
+	nes_time_t earliest_irq_;
+	nes_time_t next_irq;
+	int frame_period;
+	int frame_delay; // cycles until frame counter runs next
+	int frame; // current frame (0-3)
+	int osc_enables;
+	int frame_mode;
+	bool irq_flag;
+	void (*irq_notifier_)( void* user_data );
+	void* irq_data;
+	Nes_Square::Synth square_synth; // shared by squares
+	
+	void irq_changed();
+	void state_restored();
+	void run_until_( nes_time_t );
+	
+	// TODO: remove
+	friend class Nes_Core;
+};
+
+inline void Nes_Apu::osc_output( int osc, Blip_Buffer* buf )
+{
+	assert( (unsigned) osc < osc_count );
+	oscs [osc]->output = buf;
+}
+
+inline nes_time_t Nes_Apu::earliest_irq( nes_time_t ) const
+{
+	return earliest_irq_;
+}
+
+inline void Nes_Apu::dmc_reader( int (*func)( void*, nes_addr_t ), void* user_data )
+{
+	dmc.prg_reader_data = user_data;
+	dmc.prg_reader = func;
+}
+
+inline void Nes_Apu::irq_notifier( void (*func)( void* user_data ), void* user_data )
+{
+	irq_notifier_ = func;
+	irq_data = user_data;
+}
+
+inline int Nes_Apu::count_dmc_reads( nes_time_t time, nes_time_t* last_read ) const
+{
+	return dmc.count_reads( time, last_read );
+}
+	
+inline nes_time_t Nes_Dmc::next_read_time() const
+{
+	if ( length_counter == 0 )
+		return Nes_Apu::no_irq; // not reading
+	
+	return apu->last_dmc_time + delay + long (bits_remain - 1) * period;
+}
+
+inline nes_time_t Nes_Apu::next_dmc_read_time() const { return dmc.next_read_time(); }
+
+#endif
diff --git a/libraries/game-music-emu/gme/Nes_Cpu.cpp b/libraries/game-music-emu/gme/Nes_Cpu.cpp
new file mode 100644
index 000000000..5eb0862a3
--- /dev/null
+++ b/libraries/game-music-emu/gme/Nes_Cpu.cpp
@@ -0,0 +1,1073 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Nes_Cpu.h"
+
+#include "blargg_endian.h"
+#include <limits.h>
+
+#define BLARGG_CPU_X86 1
+
+/* Copyright (C) 2003-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#ifdef BLARGG_ENABLE_OPTIMIZER
+	#include BLARGG_ENABLE_OPTIMIZER
+#endif
+
+#define FLUSH_TIME()    (void) (s.time = s_time)
+#define CACHE_TIME()    (void) (s_time = s.time)
+
+#include "nes_cpu_io.h"
+
+#include "blargg_source.h"
+
+#ifndef CPU_DONE
+	#define CPU_DONE( cpu, time, result_out )   { result_out = -1; }
+#endif
+
+#ifndef CPU_READ_PPU
+	#define CPU_READ_PPU( cpu, addr, out, time )\
+	{\
+		FLUSH_TIME();\
+		out = CPU_READ( cpu, addr, time );\
+		CACHE_TIME();\
+	}
+#endif
+
+#if BLARGG_NONPORTABLE
+	#define PAGE_OFFSET( addr ) (addr)
+#else
+	#define PAGE_OFFSET( addr ) ((addr) & (page_size - 1))
+#endif
+
+inline void Nes_Cpu::set_code_page( int i, void const* p )
+{
+	state->code_map [i] = (uint8_t const*) p - PAGE_OFFSET( i * page_size );
+}
+
+int const st_n = 0x80;
+int const st_v = 0x40;
+int const st_r = 0x20;
+int const st_b = 0x10;
+int const st_d = 0x08;
+int const st_i = 0x04;
+int const st_z = 0x02;
+int const st_c = 0x01;
+
+void Nes_Cpu::reset( void const* unmapped_page )
+{
+	check( state == &state_ );
+	state = &state_;
+	r.status = st_i;
+	r.sp = 0xFF;
+	r.pc = 0;
+	r.a  = 0;
+	r.x  = 0;
+	r.y  = 0;
+	state_.time = 0;
+	state_.base = 0;
+	irq_time_ = future_nes_time;
+	end_time_ = future_nes_time;
+	error_count_ = 0;
+	
+	assert( page_size == 0x800 ); // assumes this
+	set_code_page( page_count, unmapped_page );
+	map_code( 0x2000, 0xE000, unmapped_page, true );
+	map_code( 0x0000, 0x2000, low_mem, true );
+	
+	blargg_verify_byte_order();
+}
+
+void Nes_Cpu::map_code( nes_addr_t start, unsigned size, void const* data, bool mirror )
+{
+	// address range must begin and end on page boundaries
+	require( start % page_size == 0 );
+	require( size % page_size == 0 );
+	require( start + size <= 0x10000 );
+	
+	unsigned page = start / page_size;
+	for ( unsigned n = size / page_size; n; --n )
+	{
+		set_code_page( page++, data );
+		if ( !mirror )
+			data = (char const*) data + page_size;
+	}
+}
+
+#define TIME    (s_time + s.base)
+#define READ_LIKELY_PPU( addr, out )    {CPU_READ_PPU( this, (addr), out, TIME );}
+#define READ( addr )                    CPU_READ( this, (addr), TIME )
+#define WRITE( addr, data )             {CPU_WRITE( this, (addr), (data), TIME );}
+#define READ_LOW( addr )        (low_mem [int (addr)])
+#define WRITE_LOW( addr, data ) (void) (READ_LOW( addr ) = (data))
+#define READ_PROG( addr )       (s.code_map [(addr) >> page_bits] [PAGE_OFFSET( addr )])
+
+#define SET_SP( v )     (sp = ((v) + 1) | 0x100)
+#define GET_SP()        ((sp - 1) & 0xFF)
+#define PUSH( v )       ((sp = (sp - 1) | 0x100), WRITE_LOW( sp, v ))
+
+bool Nes_Cpu::run( nes_time_t end_time )
+{
+	set_end_time( end_time );
+	state_t s = this->state_;
+	this->state = &s;
+	// even on x86, using s.time in place of s_time was slower
+	int16_t s_time = s.time;
+	
+	// registers
+	uint16_t pc = r.pc;
+	uint8_t a = r.a;
+	uint8_t x = r.x;
+	uint8_t y = r.y;
+	uint16_t sp;
+	SET_SP( r.sp );
+	
+	// status flags
+	#define IS_NEG (nz & 0x8080)
+	
+	#define CALC_STATUS( out ) do {\
+		out = status & (st_v | st_d | st_i);\
+		out |= ((nz >> 8) | nz) & st_n;\
+		out |= c >> 8 & st_c;\
+		if ( !(nz & 0xFF) ) out |= st_z;\
+	} while ( 0 )
+
+	#define SET_STATUS( in ) do {\
+		status = in & (st_v | st_d | st_i);\
+		nz = in << 8;\
+		c = nz;\
+		nz |= ~in & st_z;\
+	} while ( 0 )
+	
+	uint8_t status;
+	uint16_t c;  // carry set if (c & 0x100) != 0
+	uint16_t nz; // Z set if (nz & 0xFF) == 0, N set if (nz & 0x8080) != 0
+	{
+		uint8_t temp = r.status;
+		SET_STATUS( temp );
+	}
+	
+	goto loop;
+dec_clock_loop:
+	s_time--;
+loop:
+	
+	check( (unsigned) GET_SP() < 0x100 );
+	check( (unsigned) pc < 0x10000 );
+	check( (unsigned) a < 0x100 );
+	check( (unsigned) x < 0x100 );
+	check( (unsigned) y < 0x100 );
+	check( -32768 <= s_time && s_time < 32767 );
+	
+	uint8_t const* instr = s.code_map [pc >> page_bits];
+	uint8_t opcode;
+	
+	// TODO: eliminate this special case
+	#if BLARGG_NONPORTABLE
+		opcode = instr [pc];
+		pc++;
+		instr += pc;
+	#else
+		instr += PAGE_OFFSET( pc );
+		opcode = *instr++;
+		pc++;
+	#endif
+	
+	static uint8_t const clock_table [256] =
+	{// 0 1 2 3 4 5 6 7 8 9 A B C D E F
+		0,6,2,8,3,3,5,5,3,2,2,2,4,4,6,6,// 0
+		3,5,2,8,4,4,6,6,2,4,2,7,4,4,7,7,// 1
+		6,6,2,8,3,3,5,5,4,2,2,2,4,4,6,6,// 2
+		3,5,2,8,4,4,6,6,2,4,2,7,4,4,7,7,// 3
+		6,6,2,8,3,3,5,5,3,2,2,2,3,4,6,6,// 4
+		3,5,2,8,4,4,6,6,2,4,2,7,4,4,7,7,// 5
+		6,6,2,8,3,3,5,5,4,2,2,2,5,4,6,6,// 6
+		3,5,2,8,4,4,6,6,2,4,2,7,4,4,7,7,// 7
+		2,6,2,6,3,3,3,3,2,2,2,2,4,4,4,4,// 8
+		3,6,2,6,4,4,4,4,2,5,2,5,5,5,5,5,// 9
+		2,6,2,6,3,3,3,3,2,2,2,2,4,4,4,4,// A
+		3,5,2,5,4,4,4,4,2,4,2,4,4,4,4,4,// B
+		2,6,2,8,3,3,5,5,2,2,2,2,4,4,6,6,// C
+		3,5,2,8,4,4,6,6,2,4,2,7,4,4,7,7,// D
+		2,6,2,8,3,3,5,5,2,2,2,2,4,4,6,6,// E
+		3,5,0,8,4,4,6,6,2,4,2,7,4,4,7,7 // F
+	}; // 0x00 was 7 and 0xF2 was 2
+	
+	uint16_t data;
+	
+#if !BLARGG_CPU_X86
+	if ( s_time >= 0 )
+		goto out_of_time;
+	s_time += clock_table [opcode];
+	
+	data = *instr;
+	
+	switch ( opcode )
+	{
+#else
+
+	data = clock_table [opcode];
+	if ( (s_time += data) >= 0 )
+		goto possibly_out_of_time;
+almost_out_of_time:
+	
+	data = *instr;
+	
+	switch ( opcode )
+	{
+possibly_out_of_time:
+		if ( s_time < (int) data )
+			goto almost_out_of_time;
+		s_time -= data;
+		goto out_of_time;
+#endif
+
+// Macros
+
+#define GET_MSB()   (instr [1])
+#define ADD_PAGE()  (pc++, data += 0x100 * GET_MSB())
+#define GET_ADDR()  GET_LE16( instr )
+
+#define NO_PAGE_CROSSING( lsb )
+#define HANDLE_PAGE_CROSSING( lsb ) s_time += (lsb) >> 8;
+
+#define INC_DEC_XY( reg, n ) reg = uint8_t (nz = reg + n); goto loop;
+
+#define IND_Y( cross, out ) {\
+		uint16_t temp = READ_LOW( data ) + y;\
+		out = temp + 0x100 * READ_LOW( uint8_t (data + 1) );\
+		cross( temp );\
+	}
+	
+#define IND_X( out ) {\
+		uint16_t temp = data + x;\
+		out = 0x100 * READ_LOW( uint8_t (temp + 1) ) + READ_LOW( uint8_t (temp) );\
+	}
+	
+#define ARITH_ADDR_MODES( op )\
+case op - 0x04: /* (ind,x) */\
+	IND_X( data )\
+	goto ptr##op;\
+case op + 0x0C: /* (ind),y */\
+	IND_Y( HANDLE_PAGE_CROSSING, data )\
+	goto ptr##op;\
+case op + 0x10: /* zp,X */\
+	data = uint8_t (data + x);\
+case op + 0x00: /* zp */\
+	data = READ_LOW( data );\
+	goto imm##op;\
+case op + 0x14: /* abs,Y */\
+	data += y;\
+	goto ind##op;\
+case op + 0x18: /* abs,X */\
+	data += x;\
+ind##op:\
+	HANDLE_PAGE_CROSSING( data );\
+case op + 0x08: /* abs */\
+	ADD_PAGE();\
+ptr##op:\
+	FLUSH_TIME();\
+	data = READ( data );\
+	CACHE_TIME();\
+case op + 0x04: /* imm */\
+imm##op:
+
+// TODO: more efficient way to handle negative branch that wraps PC around
+#define BRANCH( cond )\
+{\
+	int16_t offset = (int8_t) data;\
+	uint16_t extra_clock = (++pc & 0xFF) + offset;\
+	if ( !(cond) ) goto dec_clock_loop;\
+	pc = uint16_t (pc + offset);\
+	s_time += extra_clock >> 8 & 1;\
+	goto loop;\
+}
+
+// Often-Used
+
+	case 0xB5: // LDA zp,x
+		a = nz = READ_LOW( uint8_t (data + x) );
+		pc++;
+		goto loop;
+	
+	case 0xA5: // LDA zp
+		a = nz = READ_LOW( data );
+		pc++;
+		goto loop;
+	
+	case 0xD0: // BNE
+		BRANCH( (uint8_t) nz );
+	
+	case 0x20: { // JSR
+		uint16_t temp = pc + 1;
+		pc = GET_ADDR();
+		WRITE_LOW( 0x100 | (sp - 1), temp >> 8 );
+		sp = (sp - 2) | 0x100;
+		WRITE_LOW( sp, temp );
+		goto loop;
+	}
+	
+	case 0x4C: // JMP abs
+		pc = GET_ADDR();
+		goto loop;
+	
+	case 0xE8: // INX
+		INC_DEC_XY( x, 1 )
+	
+	case 0x10: // BPL
+		BRANCH( !IS_NEG )
+	
+	ARITH_ADDR_MODES( 0xC5 ) // CMP
+		nz = a - data;
+		pc++;
+		c = ~nz;
+		nz &= 0xFF;
+		goto loop;
+	
+	case 0x30: // BMI
+		BRANCH( IS_NEG )
+	
+	case 0xF0: // BEQ
+		BRANCH( !(uint8_t) nz );
+	
+	case 0x95: // STA zp,x
+		data = uint8_t (data + x);
+	case 0x85: // STA zp
+		pc++;
+		WRITE_LOW( data, a );
+		goto loop;
+	
+	case 0xC8: // INY
+		INC_DEC_XY( y, 1 )
+
+	case 0xA8: // TAY
+		y  = a;
+		nz = a;
+		goto loop;
+	
+	case 0x98: // TYA
+		a  = y;
+		nz = y;
+		goto loop;
+	
+	case 0xAD:{// LDA abs
+		unsigned addr = GET_ADDR();
+		pc += 2;
+		READ_LIKELY_PPU( addr, nz );
+		a = nz;
+		goto loop;
+	}
+	
+	case 0x60: // RTS
+		pc = 1 + READ_LOW( sp );
+		pc += 0x100 * READ_LOW( 0x100 | (sp - 0xFF) );
+		sp = (sp - 0xFE) | 0x100;
+		goto loop;
+	
+	{
+		uint16_t addr;
+		
+	case 0x99: // STA abs,Y
+		addr = y + GET_ADDR();
+		pc += 2;
+		if ( addr <= 0x7FF )
+		{
+			WRITE_LOW( addr, a );
+			goto loop;
+		}
+		goto sta_ptr;
+	
+	case 0x8D: // STA abs
+		addr = GET_ADDR();
+		pc += 2;
+		if ( addr <= 0x7FF )
+		{
+			WRITE_LOW( addr, a );
+			goto loop;
+		}
+		goto sta_ptr;
+	
+	case 0x9D: // STA abs,X (slightly more common than STA abs)
+		addr = x + GET_ADDR();
+		pc += 2;
+		if ( addr <= 0x7FF )
+		{
+			WRITE_LOW( addr, a );
+			goto loop;
+		}
+	sta_ptr:
+		FLUSH_TIME();
+		WRITE( addr, a );
+		CACHE_TIME();
+		goto loop;
+		
+	case 0x91: // STA (ind),Y
+		IND_Y( NO_PAGE_CROSSING, addr )
+		pc++;
+		goto sta_ptr;
+	
+	case 0x81: // STA (ind,X)
+		IND_X( addr )
+		pc++;
+		goto sta_ptr;
+	
+	}
+	
+	case 0xA9: // LDA #imm
+		pc++;
+		a  = data;
+		nz = data;
+		goto loop;
+
+	// common read instructions
+	{
+		uint16_t addr;
+		
+	case 0xA1: // LDA (ind,X)
+		IND_X( addr )
+		pc++;
+		goto a_nz_read_addr;
+	
+	case 0xB1:// LDA (ind),Y
+		addr = READ_LOW( data ) + y;
+		HANDLE_PAGE_CROSSING( addr );
+		addr += 0x100 * READ_LOW( (uint8_t) (data + 1) );
+		pc++;
+		a = nz = READ_PROG( addr );
+		if ( (addr ^ 0x8000) <= 0x9FFF )
+			goto loop;
+		goto a_nz_read_addr;
+	
+	case 0xB9: // LDA abs,Y
+		HANDLE_PAGE_CROSSING( data + y );
+		addr = GET_ADDR() + y;
+		pc += 2;
+		a = nz = READ_PROG( addr );
+		if ( (addr ^ 0x8000) <= 0x9FFF )
+			goto loop;
+		goto a_nz_read_addr;
+	
+	case 0xBD: // LDA abs,X
+		HANDLE_PAGE_CROSSING( data + x );
+		addr = GET_ADDR() + x;
+		pc += 2;
+		a = nz = READ_PROG( addr );
+		if ( (addr ^ 0x8000) <= 0x9FFF )
+			goto loop;
+	a_nz_read_addr:
+		FLUSH_TIME();
+		a = nz = READ( addr );
+		CACHE_TIME();
+		goto loop;
+	
+	}
+
+// Branch
+
+	case 0x50: // BVC
+		BRANCH( !(status & st_v) )
+	
+	case 0x70: // BVS
+		BRANCH( status & st_v )
+	
+	case 0xB0: // BCS
+		BRANCH( c & 0x100 )
+	
+	case 0x90: // BCC
+		BRANCH( !(c & 0x100) )
+	
+// Load/store
+	
+	case 0x94: // STY zp,x
+		data = uint8_t (data + x);
+	case 0x84: // STY zp
+		pc++;
+		WRITE_LOW( data, y );
+		goto loop;
+	
+	case 0x96: // STX zp,y
+		data = uint8_t (data + y);
+	case 0x86: // STX zp
+		pc++;
+		WRITE_LOW( data, x );
+		goto loop;
+	
+	case 0xB6: // LDX zp,y
+		data = uint8_t (data + y);
+	case 0xA6: // LDX zp
+		data = READ_LOW( data );
+	case 0xA2: // LDX #imm
+		pc++;
+		x = data;
+		nz = data;
+		goto loop;
+	
+	case 0xB4: // LDY zp,x
+		data = uint8_t (data + x);
+	case 0xA4: // LDY zp
+		data = READ_LOW( data );
+	case 0xA0: // LDY #imm
+		pc++;
+		y = data;
+		nz = data;
+		goto loop;
+	
+	case 0xBC: // LDY abs,X
+		data += x;
+		HANDLE_PAGE_CROSSING( data );
+	case 0xAC:{// LDY abs
+		unsigned addr = data + 0x100 * GET_MSB();
+		pc += 2;
+		FLUSH_TIME();
+		y = nz = READ( addr );
+		CACHE_TIME();
+		goto loop;
+	}
+	
+	case 0xBE: // LDX abs,y
+		data += y;
+		HANDLE_PAGE_CROSSING( data );
+	case 0xAE:{// LDX abs
+		unsigned addr = data + 0x100 * GET_MSB();
+		pc += 2;
+		FLUSH_TIME();
+		x = nz = READ( addr );
+		CACHE_TIME();
+		goto loop;
+	}
+	
+	{
+		uint8_t temp;
+	case 0x8C: // STY abs
+		temp = y;
+		goto store_abs;
+	
+	case 0x8E: // STX abs
+		temp = x;
+	store_abs:
+		unsigned addr = GET_ADDR();
+		pc += 2;
+		if ( addr <= 0x7FF )
+		{
+			WRITE_LOW( addr, temp );
+			goto loop;
+		}
+		FLUSH_TIME();
+		WRITE( addr, temp );
+		CACHE_TIME();
+		goto loop;
+	}
+
+// Compare
+
+	case 0xEC:{// CPX abs
+		unsigned addr = GET_ADDR();
+		pc++;
+		FLUSH_TIME();
+		data = READ( addr );
+		CACHE_TIME();
+		goto cpx_data;
+	}
+	
+	case 0xE4: // CPX zp
+		data = READ_LOW( data );
+	case 0xE0: // CPX #imm
+	cpx_data:
+		nz = x - data;
+		pc++;
+		c = ~nz;
+		nz &= 0xFF;
+		goto loop;
+	
+	case 0xCC:{// CPY abs
+		unsigned addr = GET_ADDR();
+		pc++;
+		FLUSH_TIME();
+		data = READ( addr );
+		CACHE_TIME();
+		goto cpy_data;
+	}
+	
+	case 0xC4: // CPY zp
+		data = READ_LOW( data );
+	case 0xC0: // CPY #imm
+	cpy_data:
+		nz = y - data;
+		pc++;
+		c = ~nz;
+		nz &= 0xFF;
+		goto loop;
+	
+// Logical
+
+	ARITH_ADDR_MODES( 0x25 ) // AND
+		nz = (a &= data);
+		pc++;
+		goto loop;
+	
+	ARITH_ADDR_MODES( 0x45 ) // EOR
+		nz = (a ^= data);
+		pc++;
+		goto loop;
+	
+	ARITH_ADDR_MODES( 0x05 ) // ORA
+		nz = (a |= data);
+		pc++;
+		goto loop;
+	
+	case 0x2C:{// BIT abs
+		unsigned addr = GET_ADDR();
+		pc += 2;
+		status &= ~st_v;
+		READ_LIKELY_PPU( addr, nz );
+		status |= nz & st_v;
+		if ( a & nz )
+			goto loop;
+		nz <<= 8; // result must be zero, even if N bit is set
+		goto loop;
+	}
+	
+	case 0x24: // BIT zp
+		nz = READ_LOW( data );
+		pc++;
+		status &= ~st_v;
+		status |= nz & st_v;
+		if ( a & nz )
+			goto loop;
+		nz <<= 8; // result must be zero, even if N bit is set
+		goto loop;
+		
+// Add/subtract
+
+	ARITH_ADDR_MODES( 0xE5 ) // SBC
+	case 0xEB: // unofficial equivalent
+		data ^= 0xFF;
+		goto adc_imm;
+	
+	ARITH_ADDR_MODES( 0x65 ) // ADC
+	adc_imm: {
+		int16_t carry = c >> 8 & 1;
+		int16_t ov = (a ^ 0x80) + carry + (int8_t) data; // sign-extend
+		status &= ~st_v;
+		status |= ov >> 2 & 0x40;
+		c = nz = a + data + carry;
+		pc++;
+		a = (uint8_t) nz;
+		goto loop;
+	}
+	
+// Shift/rotate
+
+	case 0x4A: // LSR A
+		c = 0;
+	case 0x6A: // ROR A
+		nz = c >> 1 & 0x80;
+		c = a << 8;
+		nz |= a >> 1;
+		a = nz;
+		goto loop;
+
+	case 0x0A: // ASL A
+		nz = a << 1;
+		c = nz;
+		a = (uint8_t) nz;
+		goto loop;
+
+	case 0x2A: { // ROL A
+		nz = a << 1;
+		int16_t temp = c >> 8 & 1;
+		c = nz;
+		nz |= temp;
+		a = (uint8_t) nz;
+		goto loop;
+	}
+	
+	case 0x5E: // LSR abs,X
+		data += x;
+	case 0x4E: // LSR abs
+		c = 0;
+	case 0x6E: // ROR abs
+	ror_abs: {
+		ADD_PAGE();
+		FLUSH_TIME();
+		int temp = READ( data );
+		nz = (c >> 1 & 0x80) | (temp >> 1);
+		c = temp << 8;
+		goto rotate_common;
+	}
+	
+	case 0x3E: // ROL abs,X
+		data += x;
+		goto rol_abs;
+	
+	case 0x1E: // ASL abs,X
+		data += x;
+	case 0x0E: // ASL abs
+		c = 0;
+	case 0x2E: // ROL abs
+	rol_abs:
+		ADD_PAGE();
+		nz = c >> 8 & 1;
+		FLUSH_TIME();
+		nz |= (c = READ( data ) << 1);
+	rotate_common:
+		pc++;
+		WRITE( data, (uint8_t) nz );
+		CACHE_TIME();
+		goto loop;
+	
+	case 0x7E: // ROR abs,X
+		data += x;
+		goto ror_abs;
+	
+	case 0x76: // ROR zp,x
+		data = uint8_t (data + x);
+		goto ror_zp;
+	
+	case 0x56: // LSR zp,x
+		data = uint8_t (data + x);
+	case 0x46: // LSR zp
+		c = 0;
+	case 0x66: // ROR zp
+	ror_zp: {
+		int temp = READ_LOW( data );
+		nz = (c >> 1 & 0x80) | (temp >> 1);
+		c = temp << 8;
+		goto write_nz_zp;
+	}
+	
+	case 0x36: // ROL zp,x
+		data = uint8_t (data + x);
+		goto rol_zp;
+	
+	case 0x16: // ASL zp,x
+		data = uint8_t (data + x);
+	case 0x06: // ASL zp
+		c = 0;
+	case 0x26: // ROL zp
+	rol_zp:
+		nz = c >> 8 & 1;
+		nz |= (c = READ_LOW( data ) << 1);
+		goto write_nz_zp;
+	
+// Increment/decrement
+
+	case 0xCA: // DEX
+		INC_DEC_XY( x, -1 )
+	
+	case 0x88: // DEY
+		INC_DEC_XY( y, -1 )
+	
+	case 0xF6: // INC zp,x
+		data = uint8_t (data + x);
+	case 0xE6: // INC zp
+		nz = 1;
+		goto add_nz_zp;
+	
+	case 0xD6: // DEC zp,x
+		data = uint8_t (data + x);
+	case 0xC6: // DEC zp
+		nz = (uint16_t) -1;
+	add_nz_zp:
+		nz += READ_LOW( data );
+	write_nz_zp:
+		pc++;
+		WRITE_LOW( data, nz );
+		goto loop;
+	
+	case 0xFE: // INC abs,x
+		data = x + GET_ADDR();
+		goto inc_ptr;
+	
+	case 0xEE: // INC abs
+		data = GET_ADDR();
+	inc_ptr:
+		nz = 1;
+		goto inc_common;
+	
+	case 0xDE: // DEC abs,x
+		data = x + GET_ADDR();
+		goto dec_ptr;
+	
+	case 0xCE: // DEC abs
+		data = GET_ADDR();
+	dec_ptr:
+		nz = (uint16_t) -1;
+	inc_common:
+		FLUSH_TIME();
+		nz += READ( data );
+		pc += 2;
+		WRITE( data, (uint8_t) nz );
+		CACHE_TIME();
+		goto loop;
+		
+// Transfer
+
+	case 0xAA: // TAX
+		x  = a;
+		nz = a;
+		goto loop;
+		
+	case 0x8A: // TXA
+		a  = x;
+		nz = x;
+		goto loop;
+
+	case 0x9A: // TXS
+		SET_SP( x ); // verified (no flag change)
+		goto loop;
+	
+	case 0xBA: // TSX
+		x = nz = GET_SP();
+		goto loop;
+	
+// Stack
+	
+	case 0x48: // PHA
+		PUSH( a ); // verified
+		goto loop;
+		
+	case 0x68: // PLA
+		a = nz = READ_LOW( sp );
+		sp = (sp - 0xFF) | 0x100;
+		goto loop;
+		
+	case 0x40:{// RTI
+		uint8_t temp = READ_LOW( sp );
+		pc  = READ_LOW( 0x100 | (sp - 0xFF) );
+		pc |= READ_LOW( 0x100 | (sp - 0xFE) ) * 0x100;
+		sp = (sp - 0xFD) | 0x100;
+		data = status;
+		SET_STATUS( temp );
+		if ( !((data ^ status) & st_i) ) goto loop; // I flag didn't change
+		this->r.status = status; // update externally-visible I flag
+		blargg_long delta = s.base - irq_time_;
+		if ( delta <= 0 ) goto loop;
+		if ( status & st_i ) goto loop;
+		s_time += delta;
+		s.base = irq_time_;
+		goto loop;
+	}
+	
+	case 0x28:{// PLP
+		uint8_t temp = READ_LOW( sp );
+		sp = (sp - 0xFF) | 0x100;
+		uint8_t changed = status ^ temp;
+		SET_STATUS( temp );
+		if ( !(changed & st_i) )
+			goto loop; // I flag didn't change
+		if ( status & st_i )
+			goto handle_sei;
+		goto handle_cli;
+	}
+	
+	case 0x08: { // PHP
+		uint8_t temp;
+		CALC_STATUS( temp );
+		PUSH( temp | (st_b | st_r) );
+		goto loop;
+	}
+	
+	case 0x6C:{// JMP (ind)
+		data = GET_ADDR();
+		check( unsigned (data - 0x2000) >= 0x4000 ); // ensure it's outside I/O space
+		uint8_t const* page = s.code_map [data >> page_bits];
+		pc = page [PAGE_OFFSET( data )];
+		data = (data & 0xFF00) | ((data + 1) & 0xFF);
+		pc |= page [PAGE_OFFSET( data )] << 8;
+		goto loop;
+	}
+	
+	case 0x00: // BRK
+		goto handle_brk;
+	
+// Flags
+
+	case 0x38: // SEC
+		c = (uint16_t) ~0;
+		goto loop;
+	
+	case 0x18: // CLC
+		c = 0;
+		goto loop;
+		
+	case 0xB8: // CLV
+		status &= ~st_v;
+		goto loop;
+	
+	case 0xD8: // CLD
+		status &= ~st_d;
+		goto loop;
+	
+	case 0xF8: // SED
+		status |= st_d;
+		goto loop;
+	
+	case 0x58: // CLI
+		if ( !(status & st_i) )
+			goto loop;
+		status &= ~st_i;
+	handle_cli: {
+		//debug_printf( "CLI at %d\n", TIME );
+		this->r.status = status; // update externally-visible I flag
+		blargg_long delta = s.base - irq_time_;
+		if ( delta <= 0 )
+		{
+			if ( TIME < irq_time_ )
+				goto loop;
+			goto delayed_cli;
+		}
+		s.base = irq_time_;
+		s_time += delta;
+		if ( s_time < 0 )
+			goto loop;
+		
+		if ( delta >= s_time + 1 )
+		{
+			s.base += s_time + 1;
+			s_time = -1;
+			goto loop;
+		}
+		
+		// TODO: implement
+	delayed_cli:
+		debug_printf( "Delayed CLI not emulated\n" );
+		goto loop;
+	}
+	
+	case 0x78: // SEI
+		if ( status & st_i )
+			goto loop;
+		status |= st_i;
+	handle_sei: {
+		this->r.status = status; // update externally-visible I flag
+		blargg_long delta = s.base - end_time_;
+		s.base = end_time_;
+		s_time += delta;
+		if ( s_time < 0 )
+			goto loop;
+		
+		debug_printf( "Delayed SEI not emulated\n" );
+		goto loop;
+	}
+	
+// Unofficial
+	
+	// SKW - Skip word
+	case 0x1C: case 0x3C: case 0x5C: case 0x7C: case 0xDC: case 0xFC:
+		HANDLE_PAGE_CROSSING( data + x );
+	case 0x0C:
+		pc++;
+	// SKB - Skip byte
+	case 0x74: case 0x04: case 0x14: case 0x34: case 0x44: case 0x54: case 0x64:
+	case 0x80: case 0x82: case 0x89: case 0xC2: case 0xD4: case 0xE2: case 0xF4:
+		pc++;
+		goto loop;
+	
+	// NOP
+	case 0xEA: case 0x1A: case 0x3A: case 0x5A: case 0x7A: case 0xDA: case 0xFA:
+		goto loop;
+
+	case bad_opcode: // HLT
+		pc--;
+	case 0x02: case 0x12: case 0x22: case 0x32: case 0x42: case 0x52:
+	case 0x62: case 0x72: case 0x92: case 0xB2: case 0xD2:
+		goto stop;
+	
+// Unimplemented
+	
+	case 0xFF: // force 256-entry jump table for optimization purposes
+		c |= 1;
+	default:
+		check( (unsigned) opcode <= 0xFF );
+		// skip over proper number of bytes
+		static unsigned char const illop_lens [8] = {
+			0x40, 0x40, 0x40, 0x80, 0x40, 0x40, 0x80, 0xA0
+		};
+		uint8_t opcode = instr [-1];
+		int16_t len = illop_lens [opcode >> 2 & 7] >> (opcode << 1 & 6) & 3;
+		if ( opcode == 0x9C )
+			len = 2;
+		pc += len;
+		error_count_++;
+		
+		if ( (opcode >> 4) == 0x0B )
+		{
+			if ( opcode == 0xB3 )
+				data = READ_LOW( data );
+			if ( opcode != 0xB7 )
+				HANDLE_PAGE_CROSSING( data + y );
+		}
+		goto loop;
+	}
+	assert( false );
+	
+	int result_;
+handle_brk:
+	pc++;
+	result_ = 4;
+	
+interrupt:
+	{
+		s_time += 7;
+		
+		WRITE_LOW( 0x100 | (sp - 1), pc >> 8 );
+		WRITE_LOW( 0x100 | (sp - 2), pc );
+		pc = GET_LE16( &READ_PROG( 0xFFFA ) + result_ );
+		
+		sp = (sp - 3) | 0x100;
+		uint8_t temp;
+		CALC_STATUS( temp );
+		temp |= st_r;
+		if ( result_ )
+			temp |= st_b; // TODO: incorrectly sets B flag for IRQ
+		WRITE_LOW( sp, temp );
+		
+		this->r.status = status |= st_i;
+		blargg_long delta = s.base - end_time_;
+		if ( delta >= 0 ) goto loop;
+		s_time += delta;
+		s.base = end_time_;
+		goto loop;
+	}
+	
+out_of_time:
+	pc--;
+	FLUSH_TIME();
+	CPU_DONE( this, TIME, result_ );
+	CACHE_TIME();
+	if ( result_ >= 0 )
+		goto interrupt;
+	if ( s_time < 0 )
+		goto loop;
+	
+stop:
+	
+	s.time = s_time;
+	
+	r.pc = pc;
+	r.sp = GET_SP();
+	r.a = a;
+	r.x = x;
+	r.y = y;
+	
+	{
+		uint8_t temp;
+		CALC_STATUS( temp );
+		r.status = temp;
+	}
+	
+	this->state_ = s;
+	this->state = &this->state_;
+	
+	return s_time < 0;
+}
+
diff --git a/libraries/game-music-emu/gme/Nes_Cpu.h b/libraries/game-music-emu/gme/Nes_Cpu.h
new file mode 100644
index 000000000..878b5ba5c
--- /dev/null
+++ b/libraries/game-music-emu/gme/Nes_Cpu.h
@@ -0,0 +1,112 @@
+// NES 6502 CPU emulator
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef NES_CPU_H
+#define NES_CPU_H
+
+#include "blargg_common.h"
+
+typedef blargg_long nes_time_t; // clock cycle count
+typedef unsigned nes_addr_t; // 16-bit address
+enum { future_nes_time = INT_MAX / 2 + 1 };
+
+class Nes_Cpu {
+public:
+	// Clear registers, map low memory and its three mirrors to address 0,
+	// and mirror unmapped_page in remaining memory
+	void reset( void const* unmapped_page = 0 );
+	
+	// Map code memory (memory accessed via the program counter). Start and size
+	// must be multiple of page_size. If mirror is true, repeats code page
+	// throughout address range.
+	enum { page_size = 0x800 };
+	void map_code( nes_addr_t start, unsigned size, void const* code, bool mirror = false );
+	
+	// Access emulated memory as CPU does
+	uint8_t const* get_code( nes_addr_t );
+	
+	// 2KB of RAM at address 0
+	uint8_t low_mem [0x800];
+	
+	// NES 6502 registers. Not kept updated during a call to run().
+	struct registers_t {
+		uint16_t pc;
+		uint8_t a;
+		uint8_t x;
+		uint8_t y;
+		uint8_t status;
+		uint8_t sp;
+	};
+	registers_t r;
+	
+	// Set end_time and run CPU from current time. Returns true if execution
+	// stopped due to encountering bad_opcode.
+	bool run( nes_time_t end_time );
+	
+	// Time of beginning of next instruction to be executed
+	nes_time_t time() const             { return state->time + state->base; }
+	void set_time( nes_time_t t )       { state->time = t - state->base; }
+	void adjust_time( int delta )       { state->time += delta; }
+	
+	nes_time_t irq_time() const         { return irq_time_; }
+	void set_irq_time( nes_time_t );
+	
+	nes_time_t end_time() const         { return end_time_; }
+	void set_end_time( nes_time_t );
+	
+	// Number of undefined instructions encountered and skipped
+	void clear_error_count()            { error_count_ = 0; }
+	unsigned long error_count() const   { return error_count_; }
+	
+	// CPU invokes bad opcode handler if it encounters this
+	enum { bad_opcode = 0xF2 };
+	
+public:
+	Nes_Cpu() { state = &state_; }
+	enum { page_bits = 11 };
+	enum { page_count = 0x10000 >> page_bits };
+	enum { irq_inhibit = 0x04 };
+private:
+	struct state_t {
+		uint8_t const* code_map [page_count + 1];
+		nes_time_t base;
+		int time;
+	};
+	state_t* state; // points to state_ or a local copy within run()
+	state_t state_;
+	nes_time_t irq_time_;
+	nes_time_t end_time_;
+	unsigned long error_count_;
+	
+	void set_code_page( int, void const* );
+	inline int update_end_time( nes_time_t end, nes_time_t irq );
+};
+
+inline uint8_t const* Nes_Cpu::get_code( nes_addr_t addr )
+{
+	return state->code_map [addr >> page_bits] + addr
+	#if !BLARGG_NONPORTABLE
+		% (unsigned) page_size
+	#endif
+	;
+}
+
+inline int Nes_Cpu::update_end_time( nes_time_t t, nes_time_t irq )
+{
+	if ( irq < t && !(r.status & irq_inhibit) ) t = irq;
+	int delta = state->base - t;
+	state->base = t;
+	return delta;
+}
+
+inline void Nes_Cpu::set_irq_time( nes_time_t t )
+{
+	state->time += update_end_time( end_time_, (irq_time_ = t) );
+}
+
+inline void Nes_Cpu::set_end_time( nes_time_t t )
+{
+	state->time += update_end_time( (end_time_ = t), irq_time_ );
+}
+
+#endif
diff --git a/libraries/game-music-emu/gme/Nes_Fme7_Apu.cpp b/libraries/game-music-emu/gme/Nes_Fme7_Apu.cpp
new file mode 100644
index 000000000..93973e409
--- /dev/null
+++ b/libraries/game-music-emu/gme/Nes_Fme7_Apu.cpp
@@ -0,0 +1,121 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Nes_Fme7_Apu.h"
+
+#include <string.h>
+
+/* Copyright (C) 2003-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+void Nes_Fme7_Apu::reset()
+{
+	last_time = 0;
+	
+	for ( int i = 0; i < osc_count; i++ )
+		oscs [i].last_amp = 0;
+	
+	fme7_apu_state_t* state = this;
+	memset( state, 0, sizeof *state );
+}
+
+unsigned char const Nes_Fme7_Apu::amp_table [16] =
+{
+	#define ENTRY( n ) (unsigned char) (n * amp_range + 0.5)
+	ENTRY(0.0000), ENTRY(0.0078), ENTRY(0.0110), ENTRY(0.0156),
+	ENTRY(0.0221), ENTRY(0.0312), ENTRY(0.0441), ENTRY(0.0624),
+	ENTRY(0.0883), ENTRY(0.1249), ENTRY(0.1766), ENTRY(0.2498),
+	ENTRY(0.3534), ENTRY(0.4998), ENTRY(0.7070), ENTRY(1.0000)
+	#undef ENTRY
+};
+
+void Nes_Fme7_Apu::run_until( blip_time_t end_time )
+{
+	require( end_time >= last_time );
+	
+	for ( int index = 0; index < osc_count; index++ )
+	{
+		int mode = regs [7] >> index;
+		int vol_mode = regs [010 + index];
+		int volume = amp_table [vol_mode & 0x0F];
+		
+		Blip_Buffer* const osc_output = oscs [index].output;
+		if ( !osc_output )
+			continue;
+		osc_output->set_modified();
+		
+		// check for unsupported mode
+		#ifndef NDEBUG
+			if ( (mode & 011) <= 001 && vol_mode & 0x1F )
+				debug_printf( "FME7 used unimplemented sound mode: %02X, vol_mode: %02X\n",
+						mode, vol_mode & 0x1F );
+		#endif
+		
+		if ( (mode & 001) | (vol_mode & 0x10) )
+			volume = 0; // noise and envelope aren't supported
+		
+		// period
+		int const period_factor = 16;
+		unsigned period = (regs [index * 2 + 1] & 0x0F) * 0x100 * period_factor +
+				regs [index * 2] * period_factor;
+		if ( period < 50 ) // around 22 kHz
+		{
+			volume = 0;
+			if ( !period ) // on my AY-3-8910A, period doesn't have extra one added
+				period = period_factor;
+		}
+		
+		// current amplitude
+		int amp = volume;
+		if ( !phases [index] )
+			amp = 0;
+		{
+			int delta = amp - oscs [index].last_amp;
+			if ( delta )
+			{
+				oscs [index].last_amp = amp;
+				synth.offset( last_time, delta, osc_output );
+			}
+		}
+		
+		blip_time_t time = last_time + delays [index];
+		if ( time < end_time )
+		{
+			int delta = amp * 2 - volume;
+			if ( volume )
+			{
+				do
+				{
+					delta = -delta;
+					synth.offset_inline( time, delta, osc_output );
+					time += period;
+				}
+				while ( time < end_time );
+				
+				oscs [index].last_amp = (delta + volume) >> 1;
+				phases [index] = (delta > 0);
+			}
+			else
+			{
+				// maintain phase when silent
+				int count = (end_time - time + period - 1) / period;
+				phases [index] ^= count & 1;
+				time += (blargg_long) count * period;
+			}
+		}
+		
+		delays [index] = time - end_time;
+	}
+	
+	last_time = end_time;
+}
+
diff --git a/libraries/game-music-emu/gme/Nes_Fme7_Apu.h b/libraries/game-music-emu/gme/Nes_Fme7_Apu.h
new file mode 100644
index 000000000..b79ed6f5e
--- /dev/null
+++ b/libraries/game-music-emu/gme/Nes_Fme7_Apu.h
@@ -0,0 +1,131 @@
+// Sunsoft FME-7 sound emulator
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef NES_FME7_APU_H
+#define NES_FME7_APU_H
+
+#include "blargg_common.h"
+#include "Blip_Buffer.h"
+
+struct fme7_apu_state_t
+{
+	enum { reg_count = 14 };
+	uint8_t regs [reg_count];
+	uint8_t phases [3]; // 0 or 1
+	uint8_t latch;
+	uint16_t delays [3]; // a, b, c
+};
+
+class Nes_Fme7_Apu : private fme7_apu_state_t {
+public:
+	// See Nes_Apu.h for reference
+	void reset();
+	void volume( double );
+	void treble_eq( blip_eq_t const& );
+	void output( Blip_Buffer* );
+	enum { osc_count = 3 };
+	void osc_output( int index, Blip_Buffer* );
+	void end_frame( blip_time_t );
+	void save_state( fme7_apu_state_t* ) const;
+	void load_state( fme7_apu_state_t const& );
+	
+	// Mask and addresses of registers
+	enum { addr_mask = 0xE000 };
+	enum { data_addr = 0xE000 };
+	enum { latch_addr = 0xC000 };
+	
+	// (addr & addr_mask) == latch_addr
+	void write_latch( int );
+	
+	// (addr & addr_mask) == data_addr
+	void write_data( blip_time_t, int data );
+	
+public:
+	Nes_Fme7_Apu();
+	BLARGG_DISABLE_NOTHROW
+private:
+	// noncopyable
+	Nes_Fme7_Apu( const Nes_Fme7_Apu& );
+	Nes_Fme7_Apu& operator = ( const Nes_Fme7_Apu& );
+	
+	static unsigned char const amp_table [16];
+	
+	struct {
+		Blip_Buffer* output;
+		int last_amp;
+	} oscs [osc_count];
+	blip_time_t last_time;
+	
+	enum { amp_range = 192 }; // can be any value; this gives best error/quality tradeoff
+	Blip_Synth<blip_good_quality,1> synth;
+	
+	void run_until( blip_time_t );
+};
+
+inline void Nes_Fme7_Apu::volume( double v )
+{
+	synth.volume( 0.38 / amp_range * v ); // to do: fine-tune
+}
+
+inline void Nes_Fme7_Apu::treble_eq( blip_eq_t const& eq )
+{
+	synth.treble_eq( eq );
+}
+
+inline void Nes_Fme7_Apu::osc_output( int i, Blip_Buffer* buf )
+{
+	assert( (unsigned) i < osc_count );
+	oscs [i].output = buf;
+}
+
+inline void Nes_Fme7_Apu::output( Blip_Buffer* buf )
+{
+	for ( int i = 0; i < osc_count; i++ )
+		osc_output( i, buf );
+}
+
+inline Nes_Fme7_Apu::Nes_Fme7_Apu()
+{
+	output( NULL );
+	volume( 1.0 );
+	reset();
+}
+
+inline void Nes_Fme7_Apu::write_latch( int data ) { latch = data; }
+
+inline void Nes_Fme7_Apu::write_data( blip_time_t time, int data )
+{
+	if ( (unsigned) latch >= reg_count )
+	{
+		#ifdef debug_printf
+			debug_printf( "FME7 write to %02X (past end of sound registers)\n", (int) latch );
+		#endif
+		return;
+	}
+	
+	run_until( time );
+	regs [latch] = data;
+}
+
+inline void Nes_Fme7_Apu::end_frame( blip_time_t time )
+{
+	if ( time > last_time )
+		run_until( time );
+	
+	assert( last_time >= time );
+	last_time -= time;
+}
+
+inline void Nes_Fme7_Apu::save_state( fme7_apu_state_t* out ) const
+{
+	*out = *this;
+}
+
+inline void Nes_Fme7_Apu::load_state( fme7_apu_state_t const& in )
+{
+	reset();
+	fme7_apu_state_t* state = this;
+	*state = in;
+}
+
+#endif
diff --git a/libraries/game-music-emu/gme/Nes_Namco_Apu.cpp b/libraries/game-music-emu/gme/Nes_Namco_Apu.cpp
new file mode 100644
index 000000000..3e5fc1491
--- /dev/null
+++ b/libraries/game-music-emu/gme/Nes_Namco_Apu.cpp
@@ -0,0 +1,145 @@
+// Nes_Snd_Emu 0.1.8. http://www.slack.net/~ant/
+
+#include "Nes_Namco_Apu.h"
+
+/* Copyright (C) 2003-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+Nes_Namco_Apu::Nes_Namco_Apu()
+{
+	output( NULL );
+	volume( 1.0 );
+	reset();
+}
+
+void Nes_Namco_Apu::reset()
+{
+	last_time = 0;
+	addr_reg = 0;
+	
+	int i;
+	for ( i = 0; i < reg_count; i++ )
+		reg [i] = 0;
+	
+	for ( i = 0; i < osc_count; i++ )
+	{
+		Namco_Osc& osc = oscs [i];
+		osc.delay = 0;
+		osc.last_amp = 0;
+		osc.wave_pos = 0;
+	}
+}
+
+void Nes_Namco_Apu::output( Blip_Buffer* buf )
+{
+	for ( int i = 0; i < osc_count; i++ )
+		osc_output( i, buf );
+}
+
+/*
+void Nes_Namco_Apu::reflect_state( Tagged_Data& data )
+{
+	reflect_int16( data, BLARGG_4CHAR('A','D','D','R'), &addr_reg );
+	
+	static const char hex [17] = "0123456789ABCDEF";
+	int i;
+	for ( i = 0; i < reg_count; i++ )
+		reflect_int16( data, 'RG\0\0' + hex [i >> 4] * 0x100 + hex [i & 15], &reg [i] );
+	
+	for ( i = 0; i < osc_count; i++ )
+	{
+		reflect_int32( data, BLARGG_4CHAR('D','L','Y','0') + i, &oscs [i].delay );
+		reflect_int16( data, BLARGG_4CHAR('P','O','S','0') + i, &oscs [i].wave_pos );
+	}
+}
+*/
+
+void Nes_Namco_Apu::end_frame( blip_time_t time )
+{
+	if ( time > last_time )
+		run_until( time );
+	
+	assert( last_time >= time );
+	last_time -= time;
+}
+
+void Nes_Namco_Apu::run_until( blip_time_t nes_end_time )
+{
+	int active_oscs = (reg [0x7F] >> 4 & 7) + 1;
+	for ( int i = osc_count - active_oscs; i < osc_count; i++ )
+	{
+		Namco_Osc& osc = oscs [i];
+		Blip_Buffer* output = osc.output;
+		if ( !output )
+			continue;
+		output->set_modified();
+		
+		blip_resampled_time_t time =
+				output->resampled_time( last_time ) + osc.delay;
+		blip_resampled_time_t end_time = output->resampled_time( nes_end_time );
+		osc.delay = 0;
+		if ( time < end_time )
+		{
+			const uint8_t* osc_reg = &reg [i * 8 + 0x40];
+			if ( !(osc_reg [4] & 0xE0) )
+				continue;
+			
+			int volume = osc_reg [7] & 15;
+			if ( !volume )
+				continue;
+			
+			blargg_long freq = (osc_reg [4] & 3) * 0x10000 + osc_reg [2] * 0x100L + osc_reg [0];
+			if ( freq < 64 * active_oscs )
+				continue; // prevent low frequencies from excessively delaying freq changes
+			blip_resampled_time_t period =
+					output->resampled_duration( 983040 ) / freq * active_oscs;
+			
+			int wave_size = 32 - (osc_reg [4] >> 2 & 7) * 4;
+			if ( !wave_size )
+				continue;
+			
+			int last_amp = osc.last_amp;
+			int wave_pos = osc.wave_pos;
+			
+			do
+			{
+				// read wave sample
+				int addr = wave_pos + osc_reg [6];
+				int sample = reg [addr >> 1] >> (addr << 2 & 4);
+				wave_pos++;
+				sample = (sample & 15) * volume;
+				
+				// output impulse if amplitude changed
+				int delta = sample - last_amp;
+				if ( delta )
+				{
+					last_amp = sample;
+					synth.offset_resampled( time, delta, output );
+				}
+				
+				// next sample
+				time += period;
+				if ( wave_pos >= wave_size )
+					wave_pos = 0;
+			}
+			while ( time < end_time );
+			
+			osc.wave_pos = wave_pos;
+			osc.last_amp = last_amp;
+		}
+		osc.delay = time - end_time;
+	}
+	
+	last_time = nes_end_time;
+}
+
diff --git a/libraries/game-music-emu/gme/Nes_Namco_Apu.h b/libraries/game-music-emu/gme/Nes_Namco_Apu.h
new file mode 100644
index 000000000..876d85e0a
--- /dev/null
+++ b/libraries/game-music-emu/gme/Nes_Namco_Apu.h
@@ -0,0 +1,102 @@
+// Namco 106 sound chip emulator
+
+// Nes_Snd_Emu 0.1.8
+#ifndef NES_NAMCO_APU_H
+#define NES_NAMCO_APU_H
+
+#include "blargg_common.h"
+#include "Blip_Buffer.h"
+
+struct namco_state_t;
+
+class Nes_Namco_Apu {
+public:
+	// See Nes_Apu.h for reference.
+	void volume( double );
+	void treble_eq( const blip_eq_t& );
+	void output( Blip_Buffer* );
+	enum { osc_count = 8 };
+	void osc_output( int index, Blip_Buffer* );
+	void reset();
+	void end_frame( blip_time_t );
+	
+	// Read/write data register is at 0x4800
+	enum { data_reg_addr = 0x4800 };
+	void write_data( blip_time_t, int );
+	int read_data();
+	
+	// Write-only address register is at 0xF800
+	enum { addr_reg_addr = 0xF800 };
+	void write_addr( int );
+	
+	// to do: implement save/restore
+	void save_state( namco_state_t* out ) const;
+	void load_state( namco_state_t const& );
+	
+public:
+	Nes_Namco_Apu();
+	BLARGG_DISABLE_NOTHROW
+private:
+	// noncopyable
+	Nes_Namco_Apu( const Nes_Namco_Apu& );
+	Nes_Namco_Apu& operator = ( const Nes_Namco_Apu& );
+	
+	struct Namco_Osc {
+		blargg_long delay;
+		Blip_Buffer* output;
+		short last_amp;
+		short wave_pos;
+	};
+	
+	Namco_Osc oscs [osc_count];
+	
+	blip_time_t last_time;
+	int addr_reg;
+	
+	enum { reg_count = 0x80 };
+	uint8_t reg [reg_count];
+	Blip_Synth<blip_good_quality,15> synth;
+	
+	uint8_t& access();
+	void run_until( blip_time_t );
+};
+/*
+struct namco_state_t
+{
+	uint8_t regs [0x80];
+	uint8_t addr;
+	uint8_t unused;
+	uint8_t positions [8];
+	uint32_t delays [8];
+};
+*/
+
+inline uint8_t& Nes_Namco_Apu::access()
+{
+	int addr = addr_reg & 0x7F;
+	if ( addr_reg & 0x80 )
+		addr_reg = (addr + 1) | 0x80;
+	return reg [addr];
+}
+
+inline void Nes_Namco_Apu::volume( double v ) { synth.volume( 0.10 / osc_count * v ); }
+
+inline void Nes_Namco_Apu::treble_eq( const blip_eq_t& eq ) { synth.treble_eq( eq ); }
+
+inline void Nes_Namco_Apu::write_addr( int v ) { addr_reg = v; }
+
+inline int Nes_Namco_Apu::read_data() { return access(); }
+
+inline void Nes_Namco_Apu::osc_output( int i, Blip_Buffer* buf )
+{
+	assert( (unsigned) i < osc_count );
+	oscs [i].output = buf;
+}
+
+inline void Nes_Namco_Apu::write_data( blip_time_t time, int data )
+{
+	run_until( time );
+	access() = data;
+}
+
+#endif
diff --git a/libraries/game-music-emu/gme/Nes_Oscs.cpp b/libraries/game-music-emu/gme/Nes_Oscs.cpp
new file mode 100644
index 000000000..1ad3f59c0
--- /dev/null
+++ b/libraries/game-music-emu/gme/Nes_Oscs.cpp
@@ -0,0 +1,551 @@
+// Nes_Snd_Emu 0.1.8. http://www.slack.net/~ant/
+
+#include "Nes_Apu.h"
+
+/* Copyright (C) 2003-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+// Nes_Osc
+
+void Nes_Osc::clock_length( int halt_mask )
+{
+	if ( length_counter && !(regs [0] & halt_mask) )
+		length_counter--;
+}
+
+void Nes_Envelope::clock_envelope()
+{
+	int period = regs [0] & 15;
+	if ( reg_written [3] ) {
+		reg_written [3] = false;
+		env_delay = period;
+		envelope = 15;
+	}
+	else if ( --env_delay < 0 ) {
+		env_delay = period;
+		if ( envelope | (regs [0] & 0x20) )
+			envelope = (envelope - 1) & 15;
+	}
+}
+
+int Nes_Envelope::volume() const
+{
+	return length_counter == 0 ? 0 : (regs [0] & 0x10) ? (regs [0] & 15) : envelope;
+}
+
+// Nes_Square
+
+void Nes_Square::clock_sweep( int negative_adjust )
+{
+	int sweep = regs [1];
+	
+	if ( --sweep_delay < 0 )
+	{
+		reg_written [1] = true;
+		
+		int period = this->period();
+		int shift = sweep & shift_mask;
+		if ( shift && (sweep & 0x80) && period >= 8 )
+		{
+			int offset = period >> shift;
+			
+			if ( sweep & negate_flag )
+				offset = negative_adjust - offset;
+			
+			if ( period + offset < 0x800 )
+			{
+				period += offset;
+				// rewrite period
+				regs [2] = period & 0xFF;
+				regs [3] = (regs [3] & ~7) | ((period >> 8) & 7);
+			}
+		}
+	}
+	
+	if ( reg_written [1] ) {
+		reg_written [1] = false;
+		sweep_delay = (sweep >> 4) & 7;
+	}
+}
+
+// TODO: clean up
+inline nes_time_t Nes_Square::maintain_phase( nes_time_t time, nes_time_t end_time,
+		nes_time_t timer_period )
+{
+	nes_time_t remain = end_time - time;
+	if ( remain > 0 )
+	{
+		int count = (remain + timer_period - 1) / timer_period;
+		phase = (phase + count) & (phase_range - 1);
+		time += (blargg_long) count * timer_period;
+	}
+	return time;
+}
+
+void Nes_Square::run( nes_time_t time, nes_time_t end_time )
+{
+	const int period = this->period();
+	const int timer_period = (period + 1) * 2;
+	
+	if ( !output )
+	{
+		delay = maintain_phase( time + delay, end_time, timer_period ) - end_time;
+		return;
+	}
+	
+	output->set_modified();
+	
+	int offset = period >> (regs [1] & shift_mask);
+	if ( regs [1] & negate_flag )
+		offset = 0;
+	
+	const int volume = this->volume();
+	if ( volume == 0 || period < 8 || (period + offset) >= 0x800 )
+	{
+		if ( last_amp ) {
+			synth.offset( time, -last_amp, output );
+			last_amp = 0;
+		}
+		
+		time += delay;
+		time = maintain_phase( time, end_time, timer_period );
+	}
+	else
+	{
+		// handle duty select
+		int duty_select = (regs [0] >> 6) & 3;
+		int duty = 1 << duty_select; // 1, 2, 4, 2
+		int amp = 0;
+		if ( duty_select == 3 ) {
+			duty = 2; // negated 25%
+			amp = volume;
+		}
+		if ( phase < duty )
+			amp ^= volume;
+		
+		{
+			int delta = update_amp( amp );
+			if ( delta )
+				synth.offset( time, delta, output );
+		}
+		
+		time += delay;
+		if ( time < end_time )
+		{
+			Blip_Buffer* const output = this->output;
+			const Synth& synth = this->synth;
+			int delta = amp * 2 - volume;
+			int phase = this->phase;
+			
+			do {
+				phase = (phase + 1) & (phase_range - 1);
+				if ( phase == 0 || phase == duty ) {
+					delta = -delta;
+					synth.offset_inline( time, delta, output );
+				}
+				time += timer_period;
+			}
+			while ( time < end_time );
+			
+			last_amp = (delta + volume) >> 1;
+			this->phase = phase;
+		}
+	}
+	
+	delay = time - end_time;
+}
+
+// Nes_Triangle
+
+void Nes_Triangle::clock_linear_counter()
+{
+	if ( reg_written [3] )
+		linear_counter = regs [0] & 0x7F;
+	else if ( linear_counter )
+		linear_counter--;
+	
+	if ( !(regs [0] & 0x80) )
+		reg_written [3] = false;
+}
+
+inline int Nes_Triangle::calc_amp() const
+{
+	int amp = phase_range - phase;
+	if ( amp < 0 )
+		amp = phase - (phase_range + 1);
+	return amp;
+}
+
+// TODO: clean up
+inline nes_time_t Nes_Triangle::maintain_phase( nes_time_t time, nes_time_t end_time,
+		nes_time_t timer_period )
+{
+	nes_time_t remain = end_time - time;
+	if ( remain > 0 )
+	{
+		int count = (remain + timer_period - 1) / timer_period;
+		phase = ((unsigned) phase + 1 - count) & (phase_range * 2 - 1);
+		phase++;
+		time += (blargg_long) count * timer_period;
+	}
+	return time;
+}
+
+void Nes_Triangle::run( nes_time_t time, nes_time_t end_time )
+{
+	const int timer_period = period() + 1;
+	if ( !output )
+	{
+		time += delay;
+		delay = 0;
+		if ( length_counter && linear_counter && timer_period >= 3 )
+			delay = maintain_phase( time, end_time, timer_period ) - end_time;
+		return;
+	}
+	
+	output->set_modified();
+	
+	// to do: track phase when period < 3
+	// to do: Output 7.5 on dac when period < 2? More accurate, but results in more clicks.
+	
+	int delta = update_amp( calc_amp() );
+	if ( delta )
+		synth.offset( time, delta, output );
+	
+	time += delay;
+	if ( length_counter == 0 || linear_counter == 0 || timer_period < 3 )
+	{
+		time = end_time;
+	}
+	else if ( time < end_time )
+	{
+		Blip_Buffer* const output = this->output;
+		
+		int phase = this->phase;
+		int volume = 1;
+		if ( phase > phase_range ) {
+			phase -= phase_range;
+			volume = -volume;
+		}
+		
+		do {
+			if ( --phase == 0 ) {
+				phase = phase_range;
+				volume = -volume;
+			}
+			else {
+				synth.offset_inline( time, volume, output );
+			}
+			
+			time += timer_period;
+		}
+		while ( time < end_time );
+		
+		if ( volume < 0 )
+			phase += phase_range;
+		this->phase = phase;
+		last_amp = calc_amp();
+ 	}
+	delay = time - end_time;
+}
+
+// Nes_Dmc
+
+void Nes_Dmc::reset()
+{
+	address = 0;
+	dac = 0;
+	buf = 0;
+	bits_remain = 1;
+	bits = 0;
+	buf_full = false;
+	silence = true;
+	next_irq = Nes_Apu::no_irq;
+	irq_flag = false;
+	irq_enabled = false;
+	
+	Nes_Osc::reset();
+	period = 0x1AC;
+}
+
+void Nes_Dmc::recalc_irq()
+{
+	nes_time_t irq = Nes_Apu::no_irq;
+	if ( irq_enabled && length_counter )
+		irq = apu->last_dmc_time + delay +
+				((length_counter - 1) * 8 + bits_remain - 1) * nes_time_t (period) + 1;
+	if ( irq != next_irq ) {
+		next_irq = irq;
+		apu->irq_changed();
+	}
+}
+
+int Nes_Dmc::count_reads( nes_time_t time, nes_time_t* last_read ) const
+{
+	if ( last_read )
+		*last_read = time;
+	
+	if ( length_counter == 0 )
+		return 0; // not reading
+	
+	nes_time_t first_read = next_read_time();
+	nes_time_t avail = time - first_read;
+	if ( avail <= 0 )
+		return 0;
+	
+	int count = (avail - 1) / (period * 8) + 1;
+	if ( !(regs [0] & loop_flag) && count > length_counter )
+		count = length_counter;
+	
+	if ( last_read )
+	{
+		*last_read = first_read + (count - 1) * (period * 8) + 1;
+		check( *last_read <= time );
+		check( count == count_reads( *last_read, NULL ) );
+		check( count - 1 == count_reads( *last_read - 1, NULL ) );
+	}
+	
+	return count;
+}
+
+static short const dmc_period_table [2] [16] = {
+	{428, 380, 340, 320, 286, 254, 226, 214, // NTSC
+	190, 160, 142, 128, 106,  84,  72,  54},
+
+	{398, 354, 316, 298, 276, 236, 210, 198, // PAL
+	176, 148, 132, 118,  98,  78,  66,  50}
+};
+
+inline void Nes_Dmc::reload_sample()
+{
+	address = 0x4000 + regs [2] * 0x40;
+	length_counter = regs [3] * 0x10 + 1;
+}
+
+static byte const dac_table [128] =
+{
+	 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,
+	15,15,16,17,18,19,20,20,21,22,23,24,24,25,26,27,
+	27,28,29,30,31,31,32,33,33,34,35,36,36,37,38,38,
+	39,40,41,41,42,43,43,44,45,45,46,47,47,48,48,49,
+	50,50,51,52,52,53,53,54,55,55,56,56,57,58,58,59,
+	59,60,60,61,61,62,63,63,64,64,65,65,66,66,67,67,
+	68,68,69,70,70,71,71,72,72,73,73,74,74,75,75,75,
+	76,76,77,77,78,78,79,79,80,80,81,81,82,82,82,83,
+};
+
+void Nes_Dmc::write_register( int addr, int data )
+{
+	if ( addr == 0 )
+	{
+		period = dmc_period_table [pal_mode] [data & 15];
+		irq_enabled = (data & 0xC0) == 0x80; // enabled only if loop disabled
+		irq_flag &= irq_enabled;
+		recalc_irq();
+	}
+	else if ( addr == 1 )
+	{
+		int old_dac = dac;
+		dac = data & 0x7F;
+		
+		// adjust last_amp so that "pop" amplitude will be properly non-linear
+		// with respect to change in dac
+		int faked_nonlinear = dac - (dac_table [dac] - dac_table [old_dac]);
+		if ( !nonlinear )
+			last_amp = faked_nonlinear;
+	}
+}
+
+void Nes_Dmc::start()
+{
+	reload_sample();
+	fill_buffer();
+	recalc_irq();
+}
+
+void Nes_Dmc::fill_buffer()
+{
+	if ( !buf_full && length_counter )
+	{
+		require( prg_reader ); // prg_reader must be set
+		buf = prg_reader( prg_reader_data, 0x8000u + address );
+		address = (address + 1) & 0x7FFF;
+		buf_full = true;
+		if ( --length_counter == 0 )
+		{
+			if ( regs [0] & loop_flag ) {
+				reload_sample();
+			}
+			else {
+				apu->osc_enables &= ~0x10;
+				irq_flag = irq_enabled;
+				next_irq = Nes_Apu::no_irq;
+				apu->irq_changed();
+			}
+		}
+	}
+}
+
+void Nes_Dmc::run( nes_time_t time, nes_time_t end_time )
+{
+	int delta = update_amp( dac );
+	if ( !output )
+	{
+		silence = true;
+	}
+	else
+	{
+		output->set_modified();
+		if ( delta )
+			synth.offset( time, delta, output );
+	}
+	
+	time += delay;
+	if ( time < end_time )
+	{
+		int bits_remain = this->bits_remain;
+		if ( silence && !buf_full )
+		{
+			int count = (end_time - time + period - 1) / period;
+			bits_remain = (bits_remain - 1 + 8 - (count % 8)) % 8 + 1;
+			time += count * period;
+		}
+		else
+		{
+			Blip_Buffer* const output = this->output;
+			const int period = this->period;
+			int bits = this->bits;
+			int dac = this->dac;
+			
+			do
+			{
+				if ( !silence )
+				{
+					int step = (bits & 1) * 4 - 2;
+					bits >>= 1;
+					if ( unsigned (dac + step) <= 0x7F ) {
+						dac += step;
+						synth.offset_inline( time, step, output );
+					}
+				}
+				
+				time += period;
+				
+				if ( --bits_remain == 0 )
+				{
+					bits_remain = 8;
+					if ( !buf_full ) {
+						silence = true;
+					}
+					else {
+						silence = false;
+						bits = buf;
+						buf_full = false;
+						if ( !output )
+							silence = true;
+						fill_buffer();
+					}
+				}
+			}
+			while ( time < end_time );
+			
+			this->dac = dac;
+			this->last_amp = dac;
+			this->bits = bits;
+		}
+		this->bits_remain = bits_remain;
+	}
+	delay = time - end_time;
+}
+
+// Nes_Noise
+
+static short const noise_period_table [16] = {
+	0x004, 0x008, 0x010, 0x020, 0x040, 0x060, 0x080, 0x0A0,
+	0x0CA, 0x0FE, 0x17C, 0x1FC, 0x2FA, 0x3F8, 0x7F2, 0xFE4
+};
+
+void Nes_Noise::run( nes_time_t time, nes_time_t end_time )
+{
+	int period = noise_period_table [regs [2] & 15];
+	
+	if ( !output )
+	{
+		// TODO: clean up
+		time += delay;
+		delay = time + (end_time - time + period - 1) / period * period - end_time;
+		return;
+	}
+	
+	output->set_modified();
+	
+	const int volume = this->volume();
+	int amp = (noise & 1) ? volume : 0;
+	{
+		int delta = update_amp( amp );
+		if ( delta )
+			synth.offset( time, delta, output );
+	}
+	
+	time += delay;
+	if ( time < end_time )
+	{
+		const int mode_flag = 0x80;
+		
+		if ( !volume )
+		{
+			// round to next multiple of period
+			time += (end_time - time + period - 1) / period * period;
+			
+			// approximate noise cycling while muted, by shuffling up noise register
+			// to do: precise muted noise cycling?
+			if ( !(regs [2] & mode_flag) ) {
+				int feedback = (noise << 13) ^ (noise << 14);
+				noise = (feedback & 0x4000) | (noise >> 1);
+			}
+		}
+		else
+		{
+			Blip_Buffer* const output = this->output;
+			
+			// using resampled time avoids conversion in synth.offset()
+			blip_resampled_time_t rperiod = output->resampled_duration( period );
+			blip_resampled_time_t rtime = output->resampled_time( time );
+			
+			int noise = this->noise;
+			int delta = amp * 2 - volume;
+			const int tap = (regs [2] & mode_flag ? 8 : 13);
+			
+			do {
+				int feedback = (noise << tap) ^ (noise << 14);
+				time += period;
+				
+				if ( (noise + 1) & 2 ) {
+					// bits 0 and 1 of noise differ
+					delta = -delta;
+					synth.offset_resampled( rtime, delta, output );
+				}
+				
+				rtime += rperiod;
+				noise = (feedback & 0x4000) | (noise >> 1);
+			}
+			while ( time < end_time );
+			
+			last_amp = (delta + volume) >> 1;
+			this->noise = noise;
+		}
+	}
+	
+	delay = time - end_time;
+}
+
diff --git a/libraries/game-music-emu/gme/Nes_Oscs.h b/libraries/game-music-emu/gme/Nes_Oscs.h
new file mode 100644
index 000000000..b675bfb47
--- /dev/null
+++ b/libraries/game-music-emu/gme/Nes_Oscs.h
@@ -0,0 +1,147 @@
+// Private oscillators used by Nes_Apu
+
+// Nes_Snd_Emu 0.1.8
+#ifndef NES_OSCS_H
+#define NES_OSCS_H
+
+#include "blargg_common.h"
+#include "Blip_Buffer.h"
+
+class Nes_Apu;
+
+struct Nes_Osc
+{
+	unsigned char regs [4];
+	bool reg_written [4];
+	Blip_Buffer* output;
+	int length_counter;// length counter (0 if unused by oscillator)
+	int delay;      // delay until next (potential) transition
+	int last_amp;   // last amplitude oscillator was outputting
+	
+	void clock_length( int halt_mask );
+	int period() const {
+		return (regs [3] & 7) * 0x100 + (regs [2] & 0xFF);
+	}
+	void reset() {
+		delay = 0;
+		last_amp = 0;
+	}
+	int update_amp( int amp ) {
+		int delta = amp - last_amp;
+		last_amp = amp;
+		return delta;
+	}
+};
+
+struct Nes_Envelope : Nes_Osc
+{
+	int envelope;
+	int env_delay;
+	
+	void clock_envelope();
+	int volume() const;
+	void reset() {
+		envelope = 0;
+		env_delay = 0;
+		Nes_Osc::reset();
+	}
+};
+
+// Nes_Square
+struct Nes_Square : Nes_Envelope
+{
+	enum { negate_flag = 0x08 };
+	enum { shift_mask = 0x07 };
+	enum { phase_range = 8 };
+	int phase;
+	int sweep_delay;
+	
+	typedef Blip_Synth<blip_good_quality,1> Synth;
+	Synth const& synth; // shared between squares
+	
+	Nes_Square( Synth const* s ) : synth( *s ) { }
+	
+	void clock_sweep( int adjust );
+	void run( nes_time_t, nes_time_t );
+	void reset() {
+		sweep_delay = 0;
+		Nes_Envelope::reset();
+	}
+	nes_time_t maintain_phase( nes_time_t time, nes_time_t end_time,
+			nes_time_t timer_period );
+};
+
+// Nes_Triangle
+struct Nes_Triangle : Nes_Osc
+{
+	enum { phase_range = 16 };
+	int phase;
+	int linear_counter;
+	Blip_Synth<blip_med_quality,1> synth;
+	
+	int calc_amp() const;
+	void run( nes_time_t, nes_time_t );
+	void clock_linear_counter();
+	void reset() {
+		linear_counter = 0;
+		phase = 1;
+		Nes_Osc::reset();
+	}
+	nes_time_t maintain_phase( nes_time_t time, nes_time_t end_time,
+			nes_time_t timer_period );
+};
+
+// Nes_Noise
+struct Nes_Noise : Nes_Envelope
+{
+	int noise;
+	Blip_Synth<blip_med_quality,1> synth;
+	
+	void run( nes_time_t, nes_time_t );
+	void reset() {
+		noise = 1 << 14;
+		Nes_Envelope::reset();
+	}
+};
+
+// Nes_Dmc
+struct Nes_Dmc : Nes_Osc
+{
+	int address;    // address of next byte to read
+	int period;
+	//int length_counter; // bytes remaining to play (already defined in Nes_Osc)
+	int buf;
+	int bits_remain;
+	int bits;
+	bool buf_full;
+	bool silence;
+	
+	enum { loop_flag = 0x40 };
+	
+	int dac;
+	
+	nes_time_t next_irq;
+	bool irq_enabled;
+	bool irq_flag;
+	bool pal_mode;
+	bool nonlinear;
+	
+	int (*prg_reader)( void*, nes_addr_t ); // needs to be initialized to prg read function
+	void* prg_reader_data;
+	
+	Nes_Apu* apu;
+	
+	Blip_Synth<blip_med_quality,1> synth;
+	
+	void start();
+	void write_register( int, int );
+	void run( nes_time_t, nes_time_t );
+	void recalc_irq();
+	void fill_buffer();
+	void reload_sample();
+	void reset();
+	int count_reads( nes_time_t, nes_time_t* ) const;
+	nes_time_t next_read_time() const;
+};
+
+#endif
diff --git a/libraries/game-music-emu/gme/Nes_Vrc6_Apu.cpp b/libraries/game-music-emu/gme/Nes_Vrc6_Apu.cpp
new file mode 100644
index 000000000..d178407c3
--- /dev/null
+++ b/libraries/game-music-emu/gme/Nes_Vrc6_Apu.cpp
@@ -0,0 +1,215 @@
+// Nes_Snd_Emu 0.1.8. http://www.slack.net/~ant/
+
+#include "Nes_Vrc6_Apu.h"
+
+/* Copyright (C) 2003-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+Nes_Vrc6_Apu::Nes_Vrc6_Apu()
+{
+	output( NULL );
+	volume( 1.0 );
+	reset();
+}
+
+void Nes_Vrc6_Apu::reset()
+{
+	last_time = 0;
+	for ( int i = 0; i < osc_count; i++ )
+	{
+		Vrc6_Osc& osc = oscs [i];
+		for ( int j = 0; j < reg_count; j++ )
+			osc.regs [j] = 0;
+		osc.delay = 0;
+		osc.last_amp = 0;
+		osc.phase = 1;
+		osc.amp = 0;
+	}
+}
+
+void Nes_Vrc6_Apu::output( Blip_Buffer* buf )
+{
+	for ( int i = 0; i < osc_count; i++ )
+		osc_output( i, buf );
+}
+
+void Nes_Vrc6_Apu::run_until( blip_time_t time )
+{
+	require( time >= last_time );
+	run_square( oscs [0], time );
+	run_square( oscs [1], time );
+	run_saw( time );
+	last_time = time;
+}
+
+void Nes_Vrc6_Apu::write_osc( blip_time_t time, int osc_index, int reg, int data )
+{
+	require( (unsigned) osc_index < osc_count );
+	require( (unsigned) reg < reg_count );
+	
+	run_until( time );
+	oscs [osc_index].regs [reg] = data;
+}
+
+void Nes_Vrc6_Apu::end_frame( blip_time_t time )
+{
+	if ( time > last_time )
+		run_until( time );
+	
+	assert( last_time >= time );
+	last_time -= time;
+}
+
+void Nes_Vrc6_Apu::save_state( vrc6_apu_state_t* out ) const
+{
+	assert( sizeof (vrc6_apu_state_t) == 20 );
+	out->saw_amp = oscs [2].amp;
+	for ( int i = 0; i < osc_count; i++ )
+	{
+		Vrc6_Osc const& osc = oscs [i];
+		for ( int r = 0; r < reg_count; r++ )
+			out->regs [i] [r] = osc.regs [r];
+		
+		out->delays [i] = osc.delay;
+		out->phases [i] = osc.phase;
+	}
+}
+
+void Nes_Vrc6_Apu::load_state( vrc6_apu_state_t const& in )
+{
+	reset();
+	oscs [2].amp = in.saw_amp;
+	for ( int i = 0; i < osc_count; i++ )
+	{
+		Vrc6_Osc& osc = oscs [i];
+		for ( int r = 0; r < reg_count; r++ )
+			osc.regs [r] = in.regs [i] [r];
+		
+		osc.delay = in.delays [i];
+		osc.phase = in.phases [i];
+	}
+	if ( !oscs [2].phase )
+		oscs [2].phase = 1;
+}
+
+void Nes_Vrc6_Apu::run_square( Vrc6_Osc& osc, blip_time_t end_time )
+{
+	Blip_Buffer* output = osc.output;
+	if ( !output )
+		return;
+	output->set_modified();
+	
+	int volume = osc.regs [0] & 15;
+	if ( !(osc.regs [2] & 0x80) )
+		volume = 0;
+	
+	int gate = osc.regs [0] & 0x80;
+	int duty = ((osc.regs [0] >> 4) & 7) + 1;
+	int delta = ((gate || osc.phase < duty) ? volume : 0) - osc.last_amp;
+	blip_time_t time = last_time;
+	if ( delta )
+	{
+		osc.last_amp += delta;
+		square_synth.offset( time, delta, output );
+	}
+	
+	time += osc.delay;
+	osc.delay = 0;
+	int period = osc.period();
+	if ( volume && !gate && period > 4 )
+	{
+		if ( time < end_time )
+		{
+			int phase = osc.phase;
+			
+			do
+			{
+				phase++;
+				if ( phase == 16 )
+				{
+					phase = 0;
+					osc.last_amp = volume;
+					square_synth.offset( time, volume, output );
+				}
+				if ( phase == duty )
+				{
+					osc.last_amp = 0;
+					square_synth.offset( time, -volume, output );
+				}
+				time += period;
+			}
+			while ( time < end_time );
+			
+			osc.phase = phase;
+		}
+		osc.delay = time - end_time;
+	}
+}
+
+void Nes_Vrc6_Apu::run_saw( blip_time_t end_time )
+{
+	Vrc6_Osc& osc = oscs [2];
+	Blip_Buffer* output = osc.output;
+	if ( !output )
+		return;
+	output->set_modified();
+	
+	int amp = osc.amp;
+	int amp_step = osc.regs [0] & 0x3F;
+	blip_time_t time = last_time;
+	int last_amp = osc.last_amp;
+	if ( !(osc.regs [2] & 0x80) || !(amp_step | amp) )
+	{
+		osc.delay = 0;
+		int delta = (amp >> 3) - last_amp;
+		last_amp = amp >> 3;
+		saw_synth.offset( time, delta, output );
+	}
+	else
+	{
+		time += osc.delay;
+		if ( time < end_time )
+		{
+			int period = osc.period() * 2;
+			int phase = osc.phase;
+			
+			do
+			{
+				if ( --phase == 0 )
+				{
+					phase = 7;
+					amp = 0;
+				}
+				
+				int delta = (amp >> 3) - last_amp;
+				if ( delta )
+				{
+					last_amp = amp >> 3;
+					saw_synth.offset( time, delta, output );
+				}
+				
+				time += period;
+				amp = (amp + amp_step) & 0xFF;
+			}
+			while ( time < end_time );
+			
+			osc.phase = phase;
+			osc.amp = amp;
+		}
+		
+		osc.delay = time - end_time;
+	}
+	
+	osc.last_amp = last_amp;
+}
+
diff --git a/libraries/game-music-emu/gme/Nes_Vrc6_Apu.h b/libraries/game-music-emu/gme/Nes_Vrc6_Apu.h
new file mode 100644
index 000000000..23a6519fc
--- /dev/null
+++ b/libraries/game-music-emu/gme/Nes_Vrc6_Apu.h
@@ -0,0 +1,95 @@
+// Konami VRC6 sound chip emulator
+
+// Nes_Snd_Emu 0.1.8
+#ifndef NES_VRC6_APU_H
+#define NES_VRC6_APU_H
+
+#include "blargg_common.h"
+#include "Blip_Buffer.h"
+
+struct vrc6_apu_state_t;
+
+class Nes_Vrc6_Apu {
+public:
+	// See Nes_Apu.h for reference
+	void reset();
+	void volume( double );
+	void treble_eq( blip_eq_t const& );
+	void output( Blip_Buffer* );
+	enum { osc_count = 3 };
+	void osc_output( int index, Blip_Buffer* );
+	void end_frame( blip_time_t );
+	void save_state( vrc6_apu_state_t* ) const;
+	void load_state( vrc6_apu_state_t const& );
+	
+	// Oscillator 0 write-only registers are at $9000-$9002
+	// Oscillator 1 write-only registers are at $A000-$A002
+	// Oscillator 2 write-only registers are at $B000-$B002
+	enum { reg_count = 3 };
+	enum { base_addr = 0x9000 };
+	enum { addr_step = 0x1000 };
+	void write_osc( blip_time_t, int osc, int reg, int data );
+	
+public:
+	Nes_Vrc6_Apu();
+	BLARGG_DISABLE_NOTHROW
+private:
+	// noncopyable
+	Nes_Vrc6_Apu( const Nes_Vrc6_Apu& );
+	Nes_Vrc6_Apu& operator = ( const Nes_Vrc6_Apu& );
+	
+	struct Vrc6_Osc
+	{
+		uint8_t regs [3];
+		Blip_Buffer* output;
+		int delay;
+		int last_amp;
+		int phase;
+		int amp; // only used by saw
+		
+		int period() const
+		{
+			return (regs [2] & 0x0F) * 0x100L + regs [1] + 1;
+		}
+	};
+	
+	Vrc6_Osc oscs [osc_count];
+	blip_time_t last_time;
+	
+	Blip_Synth<blip_med_quality,1> saw_synth;
+	Blip_Synth<blip_good_quality,1> square_synth;
+	
+	void run_until( blip_time_t );
+	void run_square( Vrc6_Osc& osc, blip_time_t );
+	void run_saw( blip_time_t );
+};
+
+struct vrc6_apu_state_t
+{
+	uint8_t regs [3] [3];
+	uint8_t saw_amp;
+	uint16_t delays [3];
+	uint8_t phases [3];
+	uint8_t unused;
+};
+
+inline void Nes_Vrc6_Apu::osc_output( int i, Blip_Buffer* buf )
+{
+	assert( (unsigned) i < osc_count );
+	oscs [i].output = buf;
+}
+
+inline void Nes_Vrc6_Apu::volume( double v )
+{
+	double const factor = 0.0967 * 2;
+	saw_synth.volume( factor / 31 * v );
+	square_synth.volume( factor * 0.5 / 15 * v );
+}
+
+inline void Nes_Vrc6_Apu::treble_eq( blip_eq_t const& eq )
+{
+	saw_synth.treble_eq( eq );
+	square_synth.treble_eq( eq );
+}
+
+#endif
diff --git a/libraries/game-music-emu/gme/Nsf_Emu.cpp b/libraries/game-music-emu/gme/Nsf_Emu.cpp
new file mode 100644
index 000000000..74d76850e
--- /dev/null
+++ b/libraries/game-music-emu/gme/Nsf_Emu.cpp
@@ -0,0 +1,561 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Nsf_Emu.h"
+
+#include "blargg_endian.h"
+#include <string.h>
+#include <stdio.h>
+
+#if !NSF_EMU_APU_ONLY
+	#include "Nes_Namco_Apu.h"
+	#include "Nes_Vrc6_Apu.h"
+	#include "Nes_Fme7_Apu.h"
+#endif
+
+/* Copyright (C) 2003-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+int const vrc6_flag  = 0x01;
+int const namco_flag = 0x10;
+int const fme7_flag  = 0x20;
+
+long const clock_divisor = 12;
+
+Nsf_Emu::equalizer_t const Nsf_Emu::nes_eq     =
+	Music_Emu::make_equalizer( -1.0, 80 );
+Nsf_Emu::equalizer_t const Nsf_Emu::famicom_eq =
+	Music_Emu::make_equalizer( -15.0, 80 );
+
+int Nsf_Emu::pcm_read( void* emu, nes_addr_t addr )
+{
+	return *((Nsf_Emu*) emu)->cpu::get_code( addr );
+}
+
+Nsf_Emu::Nsf_Emu()
+{
+	vrc6  = 0;
+	namco = 0;
+	fme7  = 0;
+	
+	set_type( gme_nsf_type );
+	set_silence_lookahead( 6 );
+	apu.dmc_reader( pcm_read, this );
+	Music_Emu::set_equalizer( nes_eq );
+	set_gain( 1.4 );
+	memset( unmapped_code, Nes_Cpu::bad_opcode, sizeof unmapped_code );
+}
+
+Nsf_Emu::~Nsf_Emu() { unload(); }
+
+void Nsf_Emu::unload()
+{
+	#if !NSF_EMU_APU_ONLY
+	{
+		delete vrc6;
+		vrc6  = 0;
+		
+		delete namco;
+		namco = 0;
+		
+		delete fme7;
+		fme7  = 0;
+	}
+	#endif
+	
+	rom.clear();
+	Music_Emu::unload();
+}
+
+// Track info
+
+static void copy_nsf_fields( Nsf_Emu::header_t const& h, track_info_t* out )
+{
+	GME_COPY_FIELD( h, out, game );
+	GME_COPY_FIELD( h, out, author );
+	GME_COPY_FIELD( h, out, copyright );
+	if ( h.chip_flags )
+		Gme_File::copy_field_( out->system, "Famicom" );
+}
+
+blargg_err_t Nsf_Emu::track_info_( track_info_t* out, int ) const
+{
+	copy_nsf_fields( header_, out );
+	return 0;
+}
+
+static blargg_err_t check_nsf_header( void const* header )
+{
+	if ( memcmp( header, "NESM\x1A", 5 ) )
+		return gme_wrong_file_type;
+	return 0;
+}
+
+struct Nsf_File : Gme_Info_
+{
+	Nsf_Emu::header_t h;
+	
+	Nsf_File() { set_type( gme_nsf_type ); }
+	
+	blargg_err_t load_( Data_Reader& in )
+	{
+		blargg_err_t err = in.read( &h, Nsf_Emu::header_size );
+		if ( err )
+			return (err == in.eof_error ? gme_wrong_file_type : err);
+		
+		if ( h.chip_flags & ~(namco_flag | vrc6_flag | fme7_flag) )
+			set_warning( "Uses unsupported audio expansion hardware" );
+		
+		set_track_count( h.track_count );
+		return check_nsf_header( &h );
+	}
+	
+	blargg_err_t track_info_( track_info_t* out, int ) const
+	{
+		copy_nsf_fields( h, out );
+		return 0;
+	}
+};
+
+static Music_Emu* new_nsf_emu () { return BLARGG_NEW Nsf_Emu ; }
+static Music_Emu* new_nsf_file() { return BLARGG_NEW Nsf_File; }
+
+static gme_type_t_ const gme_nsf_type_ = { "Nintendo NES", 0, &new_nsf_emu, &new_nsf_file, "NSF", 1 };
+BLARGG_EXPORT extern gme_type_t const gme_nsf_type = &gme_nsf_type_;
+
+
+// Setup
+
+void Nsf_Emu::set_tempo_( double t )
+{
+	unsigned playback_rate = get_le16( header_.ntsc_speed );
+	unsigned standard_rate = 0x411A;
+	clock_rate_ = 1789772.72727;
+	play_period = 262 * 341L * 4 - 2; // two fewer PPU clocks every four frames
+	
+	if ( pal_only )
+	{
+		play_period   = 33247 * clock_divisor;
+		clock_rate_   = 1662607.125;
+		standard_rate = 0x4E20;
+		playback_rate = get_le16( header_.pal_speed );
+	}
+	
+	if ( !playback_rate )
+		playback_rate = standard_rate;
+	
+	if ( playback_rate != standard_rate || t != 1.0 )
+		play_period = long (playback_rate * clock_rate_ / (1000000.0 / clock_divisor * t));
+
+	apu.set_tempo( t );
+}
+
+blargg_err_t Nsf_Emu::init_sound()
+{
+	if ( header_.chip_flags & ~(namco_flag | vrc6_flag | fme7_flag) )
+		set_warning( "Uses unsupported audio expansion hardware" );
+	
+	{
+		#define APU_NAMES "Square 1", "Square 2", "Triangle", "Noise", "DMC"
+		
+		int const count = Nes_Apu::osc_count;
+		static const char* const apu_names [count] = { APU_NAMES };
+		set_voice_count( count );
+		set_voice_names( apu_names );
+		
+	}
+	
+	static int const types [] = {
+		wave_type  | 1, wave_type  | 2, wave_type | 0,
+		noise_type | 0, mixed_type | 1,
+		wave_type  | 3, wave_type  | 4, wave_type | 5,
+		wave_type  | 6, wave_type  | 7, wave_type | 8, wave_type | 9,
+		wave_type  |10, wave_type  |11, wave_type |12, wave_type |13
+	};
+	set_voice_types( types ); // common to all sound chip configurations
+	
+	double adjusted_gain = gain();
+	
+	#if NSF_EMU_APU_ONLY
+	{
+		if ( header_.chip_flags )
+			set_warning( "Uses unsupported audio expansion hardware" );
+	}
+	#else
+	{
+		if ( header_.chip_flags & (namco_flag | vrc6_flag | fme7_flag) )
+			set_voice_count( Nes_Apu::osc_count + 3 );
+		
+		if ( header_.chip_flags & namco_flag )
+		{
+			namco = BLARGG_NEW Nes_Namco_Apu;
+			CHECK_ALLOC( namco );
+			adjusted_gain *= 0.75;
+			
+			int const count = Nes_Apu::osc_count + Nes_Namco_Apu::osc_count;
+			static const char* const names [count] = {
+				APU_NAMES,
+				"Wave 1", "Wave 2", "Wave 3", "Wave 4",
+				"Wave 5", "Wave 6", "Wave 7", "Wave 8"
+			};
+			set_voice_count( count );
+			set_voice_names( names );
+		}
+		
+		if ( header_.chip_flags & vrc6_flag )
+		{
+			vrc6 = BLARGG_NEW Nes_Vrc6_Apu;
+			CHECK_ALLOC( vrc6 );
+			adjusted_gain *= 0.75;
+			
+			{
+				int const count = Nes_Apu::osc_count + Nes_Vrc6_Apu::osc_count;
+				static const char* const names [count] = {
+					APU_NAMES,
+					"Saw Wave", "Square 3", "Square 4"
+				};
+				set_voice_count( count );
+				set_voice_names( names );
+			}
+			
+			if ( header_.chip_flags & namco_flag )
+			{
+				int const count = Nes_Apu::osc_count + Nes_Vrc6_Apu::osc_count +
+						Nes_Namco_Apu::osc_count;
+				static const char* const names [count] = {
+					APU_NAMES,
+					"Saw Wave", "Square 3", "Square 4",
+					"Wave 1", "Wave 2", "Wave 3", "Wave 4",
+					"Wave 5", "Wave 6", "Wave 7", "Wave 8"
+				};
+				set_voice_count( count );
+				set_voice_names( names );
+			}
+		}
+		
+		if ( header_.chip_flags & fme7_flag )
+		{
+			fme7 = BLARGG_NEW Nes_Fme7_Apu;
+			CHECK_ALLOC( fme7 );
+			adjusted_gain *= 0.75;
+			
+			int const count = Nes_Apu::osc_count + Nes_Fme7_Apu::osc_count;
+			static const char* const names [count] = {
+				APU_NAMES,
+				"Square 3", "Square 4", "Square 5"
+			};
+			set_voice_count( count );
+			set_voice_names( names );
+		}
+		
+		if ( namco ) namco->volume( adjusted_gain );
+		if ( vrc6  ) vrc6 ->volume( adjusted_gain );
+		if ( fme7  ) fme7 ->volume( adjusted_gain );
+	}
+	#endif
+	
+	apu.volume( adjusted_gain );
+	
+	return 0;
+}
+
+blargg_err_t Nsf_Emu::load_( Data_Reader& in )
+{
+	assert( offsetof (header_t,unused [4]) == header_size );
+	RETURN_ERR( rom.load( in, header_size, &header_, 0 ) );
+	
+	set_track_count( header_.track_count );
+	RETURN_ERR( check_nsf_header( &header_ ) );
+	
+	if ( header_.vers != 1 )
+		set_warning( "Unknown file version" );
+	
+	// sound and memory
+	blargg_err_t err = init_sound();
+	if ( err )
+		return err;
+	
+	// set up data
+	nes_addr_t load_addr = get_le16( header_.load_addr );
+	init_addr = get_le16( header_.init_addr );
+	play_addr = get_le16( header_.play_addr );
+	if ( !load_addr ) load_addr = rom_begin;
+	if ( !init_addr ) init_addr = rom_begin;
+	if ( !play_addr ) play_addr = rom_begin;
+	if ( load_addr < rom_begin || init_addr < rom_begin )
+	{
+		const char* w = warning();
+		if ( !w )
+			w = "Corrupt file (invalid load/init/play address)";
+		return w;
+	}
+	
+	rom.set_addr( load_addr % bank_size );
+	int total_banks = rom.size() / bank_size;
+	
+	// bank switching
+	int first_bank = (load_addr - rom_begin) / bank_size;
+	for ( int i = 0; i < bank_count; i++ )
+	{
+		unsigned bank = i - first_bank;
+		if ( bank >= (unsigned) total_banks )
+			bank = 0;
+		initial_banks [i] = bank;
+		
+		if ( header_.banks [i] )
+		{
+			// bank-switched
+			memcpy( initial_banks, header_.banks, sizeof initial_banks );
+			break;
+		}
+	}
+	
+	pal_only = (header_.speed_flags & 3) == 1;
+	
+	#if !NSF_EMU_EXTRA_FLAGS
+		header_.speed_flags = 0;
+	#endif
+	
+	set_tempo( tempo() );
+	
+	return setup_buffer( (long) (clock_rate_ + 0.5) );
+}
+
+void Nsf_Emu::update_eq( blip_eq_t const& eq )
+{
+	apu.treble_eq( eq );
+	
+	#if !NSF_EMU_APU_ONLY
+	{
+		if ( namco ) namco->treble_eq( eq );
+		if ( vrc6  ) vrc6 ->treble_eq( eq );
+		if ( fme7  ) fme7 ->treble_eq( eq );
+	}
+	#endif
+}
+
+void Nsf_Emu::set_voice( int i, Blip_Buffer* buf, Blip_Buffer*, Blip_Buffer* )
+{
+	if ( i < Nes_Apu::osc_count )
+	{
+		apu.osc_output( i, buf );
+		return;
+	}
+	i -= Nes_Apu::osc_count;
+	
+	#if !NSF_EMU_APU_ONLY
+	{
+		if ( fme7 && i < Nes_Fme7_Apu::osc_count )
+		{
+			fme7->osc_output( i, buf );
+			return;
+		}
+		
+		if ( vrc6 )
+		{
+			if ( i < Nes_Vrc6_Apu::osc_count )
+			{
+				// put saw first
+				if ( --i < 0 )
+					i = 2;
+				vrc6->osc_output( i, buf );
+				return;
+			}
+			i -= Nes_Vrc6_Apu::osc_count;
+		}
+		
+		if ( namco && i < Nes_Namco_Apu::osc_count )
+		{
+			namco->osc_output( i, buf );
+			return;
+		}
+	}
+	#endif
+}
+
+// Emulation
+
+// see nes_cpu_io.h for read/write functions
+
+void Nsf_Emu::cpu_write_misc( nes_addr_t addr, int data )
+{
+	#if !NSF_EMU_APU_ONLY
+	{
+		if ( namco )
+		{
+			switch ( addr )
+			{
+			case Nes_Namco_Apu::data_reg_addr:
+				namco->write_data( time(), data );
+				return;
+			
+			case Nes_Namco_Apu::addr_reg_addr:
+				namco->write_addr( data );
+				return;
+			}
+		}
+		
+		if ( addr >= Nes_Fme7_Apu::latch_addr && fme7 )
+		{
+			switch ( addr & Nes_Fme7_Apu::addr_mask )
+			{
+			case Nes_Fme7_Apu::latch_addr:
+				fme7->write_latch( data );
+				return;
+			
+			case Nes_Fme7_Apu::data_addr:
+				fme7->write_data( time(), data );
+				return;
+			}
+		}
+		
+		if ( vrc6 )
+		{
+			unsigned reg = addr & (Nes_Vrc6_Apu::addr_step - 1);
+			unsigned osc = unsigned (addr - Nes_Vrc6_Apu::base_addr) / Nes_Vrc6_Apu::addr_step;
+			if ( osc < Nes_Vrc6_Apu::osc_count && reg < Nes_Vrc6_Apu::reg_count )
+			{
+				vrc6->write_osc( time(), osc, reg, data );
+				return;
+			}
+		}
+	}
+	#endif
+	
+	// unmapped write
+	
+	#ifndef NDEBUG
+	{
+		// some games write to $8000 and $8001 repeatedly
+		if ( addr == 0x8000 || addr == 0x8001 ) return;
+		
+		// probably namco sound mistakenly turned on in mck
+		if ( addr == 0x4800 || addr == 0xF800 ) return;
+		
+		// memory mapper?
+		if ( addr == 0xFFF8 ) return;
+		
+		debug_printf( "write_unmapped( 0x%04X, 0x%02X )\n", (unsigned) addr, (unsigned) data );
+	}
+	#endif
+}
+
+blargg_err_t Nsf_Emu::start_track_( int track )
+{
+	RETURN_ERR( Classic_Emu::start_track_( track ) );
+	
+	memset( low_mem, 0, sizeof low_mem );
+	memset( sram,    0, sizeof sram );
+	
+	cpu::reset( unmapped_code ); // also maps low_mem
+	cpu::map_code( sram_addr, sizeof sram, sram );
+	for ( int i = 0; i < bank_count; ++i )
+		cpu_write( bank_select_addr + i, initial_banks [i] );
+	
+	apu.reset( pal_only, (header_.speed_flags & 0x20) ? 0x3F : 0 );
+	apu.write_register( 0, 0x4015, 0x0F );
+	apu.write_register( 0, 0x4017, (header_.speed_flags & 0x10) ? 0x80 : 0 );
+	#if !NSF_EMU_APU_ONLY
+	{
+		if ( namco ) namco->reset();
+		if ( vrc6  ) vrc6 ->reset();
+		if ( fme7  ) fme7 ->reset();
+	}
+	#endif
+	
+	play_ready = 4;
+	play_extra = 0;
+	next_play = play_period / clock_divisor;
+	
+	saved_state.pc = badop_addr;
+	low_mem [0x1FF] = (badop_addr - 1) >> 8;
+	low_mem [0x1FE] = (badop_addr - 1) & 0xFF;
+	r.sp = 0xFD;
+	r.pc = init_addr;
+	r.a  = track;
+	r.x  = pal_only;
+	
+	return 0;
+}
+
+blargg_err_t Nsf_Emu::run_clocks( blip_time_t& duration, int )
+{
+	set_time( 0 );
+	while ( time() < duration )
+	{
+		nes_time_t end = min( (blip_time_t) next_play, duration );
+		end = min( end, time() + 32767 ); // allows CPU to use 16-bit time delta
+		if ( cpu::run( end ) )
+		{
+			if ( r.pc != badop_addr )
+			{
+				set_warning( "Emulation error (illegal instruction)" );
+				r.pc++;
+			}
+			else
+			{
+				play_ready = 1;
+				if ( saved_state.pc != badop_addr )
+				{
+					cpu::r = saved_state;
+					saved_state.pc = badop_addr;
+				}
+				else
+				{
+					set_time( end );
+				}
+			}
+		}
+		
+		if ( time() >= next_play )
+		{
+			nes_time_t period = (play_period + play_extra) / clock_divisor;
+			play_extra = play_period - period * clock_divisor;
+			next_play += period;
+			if ( play_ready && !--play_ready )
+			{
+				check( saved_state.pc == badop_addr );
+				if ( r.pc != badop_addr )
+					saved_state = cpu::r;
+				
+				r.pc = play_addr;
+				low_mem [0x100 + r.sp--] = (badop_addr - 1) >> 8;
+				low_mem [0x100 + r.sp--] = (badop_addr - 1) & 0xFF;
+				GME_FRAME_HOOK( this );
+			}
+		}
+	}
+	
+	if ( cpu::error_count() )
+	{
+		cpu::clear_error_count();
+		set_warning( "Emulation error (illegal instruction)" );
+	}
+	
+	duration = time();
+	next_play -= duration;
+	check( next_play >= 0 );
+	if ( next_play < 0 )
+		next_play = 0;
+	
+	apu.end_frame( duration );
+	
+	#if !NSF_EMU_APU_ONLY
+	{
+		if ( namco ) namco->end_frame( duration );
+		if ( vrc6  ) vrc6 ->end_frame( duration );
+		if ( fme7  ) fme7 ->end_frame( duration );
+	}
+	#endif
+	
+	return 0;
+}
diff --git a/libraries/game-music-emu/gme/Nsf_Emu.h b/libraries/game-music-emu/gme/Nsf_Emu.h
new file mode 100644
index 000000000..e538b1b30
--- /dev/null
+++ b/libraries/game-music-emu/gme/Nsf_Emu.h
@@ -0,0 +1,106 @@
+// Nintendo NES/Famicom NSF music file emulator
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef NSF_EMU_H
+#define NSF_EMU_H
+
+#include "Classic_Emu.h"
+#include "Nes_Apu.h"
+#include "Nes_Cpu.h"
+
+class Nsf_Emu : private Nes_Cpu, public Classic_Emu {
+	typedef Nes_Cpu cpu;
+public:
+	// Equalizer profiles for US NES and Japanese Famicom
+	static equalizer_t const nes_eq;
+	static equalizer_t const famicom_eq;
+	
+	// NSF file header
+	enum { header_size = 0x80 };
+	struct header_t
+	{
+		char tag [5];
+		byte vers;
+		byte track_count;
+		byte first_track;
+		byte load_addr [2];
+		byte init_addr [2];
+		byte play_addr [2];
+		char game [32];
+		char author [32];
+		char copyright [32];
+		byte ntsc_speed [2];
+		byte banks [8];
+		byte pal_speed [2];
+		byte speed_flags;
+		byte chip_flags;
+		byte unused [4];
+	};
+	
+	// Header for currently loaded file
+	header_t const& header() const { return header_; }
+	
+	static gme_type_t static_type() { return gme_nsf_type; }
+	
+public:
+	// deprecated
+	using Music_Emu::load;
+	blargg_err_t load( header_t const& h, Data_Reader& in ) // use Remaining_Reader
+			{ return load_remaining_( &h, sizeof h, in ); }
+
+public:
+	Nsf_Emu();
+	~Nsf_Emu();
+	Nes_Apu* apu_() { return &apu; }
+protected:
+	blargg_err_t track_info_( track_info_t*, int track ) const;
+	blargg_err_t load_( Data_Reader& );
+	blargg_err_t start_track_( int );
+	blargg_err_t run_clocks( blip_time_t&, int );
+	void set_tempo_( double );
+	void set_voice( int, Blip_Buffer*, Blip_Buffer*, Blip_Buffer* );
+	void update_eq( blip_eq_t const& );
+	void unload();
+protected:
+	enum { bank_count = 8 };
+	byte initial_banks [bank_count];
+	nes_addr_t init_addr;
+	nes_addr_t play_addr;
+	double clock_rate_;
+	bool pal_only;
+	
+	// timing
+	Nes_Cpu::registers_t saved_state;
+	nes_time_t next_play;
+	nes_time_t play_period;
+	int play_extra;
+	int play_ready;
+	
+	enum { rom_begin = 0x8000 };
+	enum { bank_select_addr = 0x5FF8 };
+	enum { bank_size = 0x1000 };
+	Rom_Data<bank_size> rom;
+	
+public: private: friend class Nes_Cpu;
+	void cpu_jsr( nes_addr_t );
+	int cpu_read( nes_addr_t );
+	void cpu_write( nes_addr_t, int );
+	void cpu_write_misc( nes_addr_t, int );
+	enum { badop_addr = bank_select_addr };
+	
+private:
+	class Nes_Namco_Apu* namco;
+	class Nes_Vrc6_Apu*  vrc6;
+	class Nes_Fme7_Apu*  fme7;
+	Nes_Apu apu;
+	static int pcm_read( void*, nes_addr_t );
+	blargg_err_t init_sound();
+	
+	header_t header_;
+	
+	enum { sram_addr = 0x6000 };
+	byte sram [0x2000];
+	byte unmapped_code [Nes_Cpu::page_size + 8];
+};
+
+#endif
diff --git a/libraries/game-music-emu/gme/Nsfe_Emu.cpp b/libraries/game-music-emu/gme/Nsfe_Emu.cpp
new file mode 100644
index 000000000..035f99dee
--- /dev/null
+++ b/libraries/game-music-emu/gme/Nsfe_Emu.cpp
@@ -0,0 +1,335 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Nsfe_Emu.h"
+
+#include "blargg_endian.h"
+#include <string.h>
+#include <ctype.h>
+
+/* Copyright (C) 2005-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+Nsfe_Info::Nsfe_Info() { playlist_disabled = false; }
+
+Nsfe_Info::~Nsfe_Info() { }
+
+inline void Nsfe_Info::unload()
+{
+	track_name_data.clear();
+	track_names.clear();
+	playlist.clear();
+	track_times.clear();
+}
+
+// TODO: if no playlist, treat as if there is a playlist that is just 1,2,3,4,5... ?
+void Nsfe_Info::disable_playlist( bool b )
+{
+	playlist_disabled = b;
+	info.track_count = playlist.size();
+	if ( !info.track_count || playlist_disabled )
+		info.track_count = actual_track_count_;
+}
+
+int Nsfe_Info::remap_track( int track ) const
+{
+	if ( !playlist_disabled && (unsigned) track < playlist.size() )
+		track = playlist [track];
+	return track;
+}
+
+// Read multiple strings and separate into individual strings
+static blargg_err_t read_strs( Data_Reader& in, long size, blargg_vector<char>& chars,
+		blargg_vector<const char*>& strs )
+{
+	RETURN_ERR( chars.resize( size + 1 ) );
+	chars [size] = 0; // in case last string doesn't have terminator
+	RETURN_ERR( in.read( &chars [0], size ) );
+	
+	RETURN_ERR( strs.resize( 128 ) );
+	int count = 0;
+	for ( int i = 0; i < size; i++ )
+	{
+		if ( (int) strs.size() <= count )
+			RETURN_ERR( strs.resize( count * 2 ) );
+		strs [count++] = &chars [i];
+		while ( i < size && chars [i] )
+			i++;
+	}
+	
+	return strs.resize( count );
+}
+
+// Copy in to out, where out has out_max characters allocated. Truncate to
+// out_max - 1 characters.
+static void copy_str( const char* in, char* out, int out_max )
+{
+	out [out_max - 1] = 0;
+	strncpy( out, in, out_max - 1 );
+}
+
+struct nsfe_info_t
+{
+	byte load_addr [2];
+	byte init_addr [2];
+	byte play_addr [2];
+	byte speed_flags;
+	byte chip_flags;
+	byte track_count;
+	byte first_track;
+	byte unused [6];
+};
+
+blargg_err_t Nsfe_Info::load( Data_Reader& in, Nsf_Emu* nsf_emu )
+{
+	int const nsfe_info_size = 16;
+	assert( offsetof (nsfe_info_t,unused [6]) == nsfe_info_size );
+	
+	// check header
+	byte signature [4];
+	blargg_err_t err = in.read( signature, sizeof signature );
+	if ( err )
+		return (err == in.eof_error ? gme_wrong_file_type : err);
+	if ( memcmp( signature, "NSFE", 4 ) )
+		return gme_wrong_file_type;
+	
+	// free previous info
+	track_name_data.clear();
+	track_names.clear();
+	playlist.clear();
+	track_times.clear();
+	
+	// default nsf header
+	static const Nsf_Emu::header_t base_header =
+	{
+		{'N','E','S','M','\x1A'},// tag
+		1,                  // version
+		1, 1,               // track count, first track
+		{0,0},{0,0},{0,0},  // addresses
+		"","","",           // strings
+		{0x1A, 0x41},       // NTSC rate
+		{0,0,0,0,0,0,0,0},  // banks
+		{0x20, 0x4E},       // PAL rate
+		0, 0,               // flags
+		{0,0,0,0}           // unused
+	};
+	Nsf_Emu::header_t& header = info;
+	header = base_header;
+	
+	// parse tags
+	int phase = 0;
+	while ( phase != 3 )
+	{
+		// read size and tag
+		byte block_header [2] [4];
+		RETURN_ERR( in.read( block_header, sizeof block_header ) );
+		blargg_long size = get_le32( block_header [0] );
+		blargg_long tag  = get_le32( block_header [1] );
+
+		if ( size < 0 )
+			return "Corrupt file";
+		
+		//debug_printf( "tag: %c%c%c%c\n", char(tag), char(tag>>8), char(tag>>16), char(tag>>24) );
+		
+		switch ( tag )
+		{
+			case BLARGG_4CHAR('O','F','N','I'): {
+				check( phase == 0 );
+				if ( size < 8 )
+					return "Corrupt file";
+				
+				nsfe_info_t finfo;
+				finfo.track_count = 1;
+				finfo.first_track = 0;
+				
+				RETURN_ERR( in.read( &finfo, min( size, (blargg_long) nsfe_info_size ) ) );
+				if ( size > nsfe_info_size )
+					RETURN_ERR( in.skip( size - nsfe_info_size ) );
+				phase = 1;
+				info.speed_flags = finfo.speed_flags;
+				info.chip_flags  = finfo.chip_flags;
+				info.track_count = finfo.track_count;
+				this->actual_track_count_ = finfo.track_count;
+				info.first_track = finfo.first_track;
+				memcpy( info.load_addr, finfo.load_addr, 2 * 3 );
+				break;
+			}
+			
+			case BLARGG_4CHAR('K','N','A','B'):
+				if ( size > (int) sizeof info.banks )
+					return "Corrupt file";
+				RETURN_ERR( in.read( info.banks, size ) );
+				break;
+			
+			case BLARGG_4CHAR('h','t','u','a'): {
+				blargg_vector<char> chars;
+				blargg_vector<const char*> strs;
+				RETURN_ERR( read_strs( in, size, chars, strs ) );
+				int n = strs.size();
+				
+				if ( n > 3 )
+					copy_str( strs [3], info.dumper, sizeof info.dumper );
+				
+				if ( n > 2 )
+					copy_str( strs [2], info.copyright, sizeof info.copyright );
+				
+				if ( n > 1 )
+					copy_str( strs [1], info.author, sizeof info.author );
+				
+				if ( n > 0 )
+					copy_str( strs [0], info.game, sizeof info.game );
+				
+				break;
+			}
+			
+			case BLARGG_4CHAR('e','m','i','t'):
+				RETURN_ERR( track_times.resize( size / 4 ) );
+				RETURN_ERR( in.read( track_times.begin(), track_times.size() * 4 ) );
+				break;
+			
+			case BLARGG_4CHAR('l','b','l','t'):
+				RETURN_ERR( read_strs( in, size, track_name_data, track_names ) );
+				break;
+			
+			case BLARGG_4CHAR('t','s','l','p'):
+				RETURN_ERR( playlist.resize( size ) );
+				RETURN_ERR( in.read( &playlist [0], size ) );
+				break;
+			
+			case BLARGG_4CHAR('A','T','A','D'): {
+				check( phase == 1 );
+				phase = 2;
+				if ( !nsf_emu )
+				{
+					RETURN_ERR( in.skip( size ) );
+				}
+				else
+				{
+					Subset_Reader sub( &in, size ); // limit emu to nsf data
+					Remaining_Reader rem( &header, Nsf_Emu::header_size, &sub );
+					RETURN_ERR( nsf_emu->load( rem ) );
+					check( rem.remain() == 0 );
+				}
+				break;
+			}
+			
+			case BLARGG_4CHAR('D','N','E','N'):
+				check( phase == 2 );
+				phase = 3;
+				break;
+			
+			default:
+				// tags that can be skipped start with a lowercase character
+				check( islower( (tag >> 24) & 0xFF ) );
+				RETURN_ERR( in.skip( size ) );
+				break;
+		}
+	}
+	
+	return 0;
+}
+
+blargg_err_t Nsfe_Info::track_info_( track_info_t* out, int track ) const
+{
+	int remapped = remap_track( track );
+	if ( (unsigned) remapped < track_times.size() )
+	{
+		long length = (int32_t) get_le32( track_times [remapped] );
+		if ( length > 0 )
+			out->length = length;
+	}
+	if ( (unsigned) remapped < track_names.size() )
+		Gme_File::copy_field_( out->song, track_names [remapped] );
+	
+	GME_COPY_FIELD( info, out, game );
+	GME_COPY_FIELD( info, out, author );
+	GME_COPY_FIELD( info, out, copyright );
+	GME_COPY_FIELD( info, out, dumper );
+	return 0;
+}
+
+Nsfe_Emu::Nsfe_Emu()
+{
+	loading = false;
+	set_type( gme_nsfe_type );
+}
+
+Nsfe_Emu::~Nsfe_Emu() { }
+
+void Nsfe_Emu::unload()
+{
+	if ( !loading )
+		info.unload(); // TODO: extremely hacky!
+	Nsf_Emu::unload();
+}
+
+blargg_err_t Nsfe_Emu::track_info_( track_info_t* out, int track ) const
+{
+	return info.track_info_( out, track );
+}
+
+struct Nsfe_File : Gme_Info_
+{
+	Nsfe_Info info;
+	
+	Nsfe_File() { set_type( gme_nsfe_type ); }
+	
+	blargg_err_t load_( Data_Reader& in )
+	{
+		RETURN_ERR( info.load( in, 0 ) );
+		info.disable_playlist( false );
+		set_track_count( info.info.track_count );
+		return 0;
+	}
+	
+	blargg_err_t track_info_( track_info_t* out, int track ) const
+	{
+		return info.track_info_( out, track );
+	}
+};
+
+static Music_Emu* new_nsfe_emu () { return BLARGG_NEW Nsfe_Emu ; }
+static Music_Emu* new_nsfe_file() { return BLARGG_NEW Nsfe_File; }
+
+static gme_type_t_ const gme_nsfe_type_ = { "Nintendo NES", 0, &new_nsfe_emu, &new_nsfe_file, "NSFE", 1 };
+BLARGG_EXPORT extern gme_type_t const gme_nsfe_type = &gme_nsfe_type_;
+
+
+blargg_err_t Nsfe_Emu::load_( Data_Reader& in )
+{
+	if ( loading )
+		return Nsf_Emu::load_( in );
+	
+	// TODO: this hacky recursion-avoidance could have subtle problems
+	loading = true;
+	blargg_err_t err = info.load( in, this );
+	loading = false;
+	disable_playlist( false );
+	return err;
+}
+
+void Nsfe_Emu::disable_playlist( bool b )
+{
+	info.disable_playlist( b );
+	set_track_count( info.info.track_count );
+}
+
+void Nsfe_Emu::clear_playlist_()
+{
+	disable_playlist();
+	Nsf_Emu::clear_playlist_();
+}
+
+blargg_err_t Nsfe_Emu::start_track_( int track )
+{
+	return Nsf_Emu::start_track_( info.remap_track( track ) );
+}
diff --git a/libraries/game-music-emu/gme/Nsfe_Emu.h b/libraries/game-music-emu/gme/Nsfe_Emu.h
new file mode 100644
index 000000000..fd65f0af8
--- /dev/null
+++ b/libraries/game-music-emu/gme/Nsfe_Emu.h
@@ -0,0 +1,68 @@
+// Nintendo NES/Famicom NSFE music file emulator
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef NSFE_EMU_H
+#define NSFE_EMU_H
+
+#include "blargg_common.h"
+#include "Nsf_Emu.h"
+
+// Allows reading info from NSFE file without creating emulator
+class Nsfe_Info {
+public:
+	blargg_err_t load( Data_Reader&, Nsf_Emu* );
+	
+	struct info_t : Nsf_Emu::header_t
+	{
+		char game      [256];
+		char author    [256];
+		char copyright [256];
+		char dumper    [256];
+	} info;
+	
+	void disable_playlist( bool = true );
+	
+	blargg_err_t track_info_( track_info_t* out, int track ) const;
+	
+	int remap_track( int i ) const;
+	
+	void unload();
+	
+	Nsfe_Info();
+	~Nsfe_Info();
+private:
+	blargg_vector<char> track_name_data;
+	blargg_vector<const char*> track_names;
+	blargg_vector<unsigned char> playlist;
+	blargg_vector<char [4]> track_times;
+	int actual_track_count_;
+	bool playlist_disabled;
+};
+
+class Nsfe_Emu : public Nsf_Emu {
+public:
+	static gme_type_t static_type() { return gme_nsfe_type; }
+	
+public:
+	// deprecated
+	struct header_t { char tag [4]; };
+	using Music_Emu::load;
+	blargg_err_t load( header_t const& h, Data_Reader& in ) // use Remaining_Reader
+			{ return load_remaining_( &h, sizeof h, in ); }
+	void disable_playlist( bool = true ); // use clear_playlist()
+
+public:
+	Nsfe_Emu();
+	~Nsfe_Emu();
+protected:
+	blargg_err_t load_( Data_Reader& );
+	blargg_err_t track_info_( track_info_t*, int track ) const;
+	blargg_err_t start_track_( int );
+	void unload();
+	void clear_playlist_();
+private:
+	Nsfe_Info info;
+	bool loading;
+};
+
+#endif
diff --git a/libraries/game-music-emu/gme/Sap_Apu.cpp b/libraries/game-music-emu/gme/Sap_Apu.cpp
new file mode 100644
index 000000000..26fa2d13f
--- /dev/null
+++ b/libraries/game-music-emu/gme/Sap_Apu.cpp
@@ -0,0 +1,334 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Sap_Apu.h"
+
+#include <string.h>
+
+/* Copyright (C) 2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+int const max_frequency = 12000; // pure waves above this frequency are silenced
+
+static void gen_poly( blargg_ulong mask, int count, byte* out )
+{
+	blargg_ulong n = 1;
+	do
+	{
+		int bits = 0;
+		int b = 0;
+		do
+		{
+			// implemented using "Galios configuration"
+			bits |= (n & 1) << b;
+			n = (n >> 1) ^ (mask & -(n & 1));
+		}
+		while ( b++ < 7 );
+		*out++ = bits;
+	}
+	while ( --count );
+}
+
+// poly5
+int const poly5_len = (1 <<  5) - 1;
+blargg_ulong const poly5_mask = (1UL << poly5_len) - 1;
+blargg_ulong const poly5 = 0x167C6EA1;
+
+inline blargg_ulong run_poly5( blargg_ulong in, int shift )
+{
+	return (in << shift & poly5_mask) | (in >> (poly5_len - shift));
+}
+
+#define POLY_MASK( width, tap1, tap2 ) \
+	((1UL << (width - 1 - tap1)) | (1UL << (width - 1 - tap2)))
+
+Sap_Apu_Impl::Sap_Apu_Impl()
+{
+	gen_poly( POLY_MASK(  4, 1, 0 ), sizeof poly4,  poly4  );
+	gen_poly( POLY_MASK(  9, 5, 0 ), sizeof poly9,  poly9  );
+	gen_poly( POLY_MASK( 17, 5, 0 ), sizeof poly17, poly17 );
+	
+	if ( 0 ) // comment out to recauculate poly5 constant
+	{
+		byte poly5 [4];
+		gen_poly( POLY_MASK(  5, 2, 0 ), sizeof poly5,  poly5  );
+		blargg_ulong n = poly5 [3] * 0x1000000L + poly5 [2] * 0x10000L + 
+				poly5 [1] * 0x100L + poly5 [0];
+		blargg_ulong rev = n & 1;
+		for ( int i = 1; i < poly5_len; i++ )
+			rev |= (n >> i & 1) << (poly5_len - i);
+		debug_printf( "poly5: 0x%08lX\n", rev );
+	}
+}
+
+Sap_Apu::Sap_Apu()
+{
+	impl = 0;
+	for ( int i = 0; i < osc_count; i++ )
+		osc_output( i, 0 );
+}
+
+void Sap_Apu::reset( Sap_Apu_Impl* new_impl )
+{
+	impl      = new_impl;
+	last_time = 0;
+	poly5_pos = 0;
+	poly4_pos = 0;
+	polym_pos = 0;
+	control   = 0;
+	
+	for ( int i = 0; i < osc_count; i++ )
+		memset( &oscs [i], 0, offsetof (osc_t,output) );
+}
+
+inline void Sap_Apu::calc_periods()
+{
+	 // 15/64 kHz clock
+	int divider = 28;
+	if ( this->control & 1 )
+		divider = 114;
+	
+	for ( int i = 0; i < osc_count; i++ )
+	{
+		osc_t* const osc = &oscs [i];
+		
+		int const osc_reload = osc->regs [0]; // cache
+		blargg_long period = (osc_reload + 1) * divider;
+		static byte const fast_bits [osc_count] = { 1 << 6, 1 << 4, 1 << 5, 1 << 3 };
+		if ( this->control & fast_bits [i] )
+		{
+			period = osc_reload + 4;
+			if ( i & 1 )
+			{
+				period = osc_reload * 0x100L + osc [-1].regs [0] + 7;
+				if ( !(this->control & fast_bits [i - 1]) )
+					period = (period - 6) * divider;
+				
+				if ( (osc [-1].regs [1] & 0x1F) > 0x10 )
+					debug_printf( "Use of slave channel in 16-bit mode not supported\n" );
+			}
+		}
+		osc->period = period;
+	}
+}
+
+void Sap_Apu::run_until( blip_time_t end_time )
+{
+	calc_periods();
+	Sap_Apu_Impl* const impl = this->impl; // cache
+	
+	// 17/9-bit poly selection
+	byte const* polym = impl->poly17;
+	int polym_len = poly17_len;
+	if ( this->control & 0x80 )
+	{
+		polym_len = poly9_len;
+		polym = impl->poly9;
+	}
+	polym_pos %= polym_len;
+	
+	for ( int i = 0; i < osc_count; i++ )
+	{
+		osc_t* const osc = &oscs [i];
+		blip_time_t time = last_time + osc->delay;
+		blip_time_t const period = osc->period;
+		
+		// output
+		Blip_Buffer* output = osc->output;
+		if ( output )
+		{
+			output->set_modified();
+			
+			int const osc_control = osc->regs [1]; // cache
+			int volume = (osc_control & 0x0F) * 2;
+			if ( !volume || osc_control & 0x10 || // silent, DAC mode, or inaudible frequency
+					((osc_control & 0xA0) == 0xA0 && period < 1789773 / 2 / max_frequency) )
+			{
+				if ( !(osc_control & 0x10) )
+					volume >>= 1; // inaudible frequency = half volume
+				
+				int delta = volume - osc->last_amp;
+				if ( delta )
+				{
+					osc->last_amp = volume;
+					impl->synth.offset( last_time, delta, output );
+				}
+				
+				// TODO: doesn't maintain high pass flip-flop (very minor issue)
+			}
+			else
+			{
+				// high pass
+				static byte const hipass_bits [osc_count] = { 1 << 2, 1 << 1, 0, 0 };
+				blip_time_t period2 = 0; // unused if no high pass
+				blip_time_t time2 = end_time;
+				if ( this->control & hipass_bits [i] )
+				{
+					period2 = osc [2].period;
+					time2 = last_time + osc [2].delay;
+					if ( osc->invert )
+					{
+						// trick inner wave loop into inverting output
+						osc->last_amp -= volume;
+						volume = -volume;
+					}
+				}
+				
+				if ( time < end_time || time2 < end_time )
+				{
+					// poly source
+					static byte const poly1 [] = { 0x55, 0x55 }; // square wave
+					byte const* poly = poly1;
+					int poly_len = 8 * sizeof poly1; // can be just 2 bits, but this is faster
+					int poly_pos = osc->phase & 1;
+					int poly_inc = 1;
+					if ( !(osc_control & 0x20) )
+					{
+						poly     = polym;
+						poly_len = polym_len;
+						poly_pos = polym_pos;
+						if ( osc_control & 0x40 )
+						{
+							poly     = impl->poly4;
+							poly_len = poly4_len;
+							poly_pos = poly4_pos;
+						}
+						poly_inc = period % poly_len;
+						poly_pos = (poly_pos + osc->delay) % poly_len;
+					}
+					poly_inc -= poly_len; // allows more optimized inner loop below
+					
+					// square/poly5 wave
+					blargg_ulong wave = poly5;
+					check( poly5 & 1 ); // low bit is set for pure wave
+					int poly5_inc = 0;
+					if ( !(osc_control & 0x80) )
+					{
+						wave = run_poly5( wave, (osc->delay + poly5_pos) % poly5_len );
+						poly5_inc = period % poly5_len;
+					}
+					
+					// Run wave and high pass interleved with each catching up to the other.
+					// Disabled high pass has no performance effect since inner wave loop
+					// makes no compromise for high pass, and only runs once in that case.
+					int osc_last_amp = osc->last_amp;
+					do
+					{
+						// run high pass
+						if ( time2 < time )
+						{
+							int delta = -osc_last_amp;
+							if ( volume < 0 )
+								delta += volume;
+							if ( delta )
+							{
+								osc_last_amp += delta - volume;
+								volume = -volume;
+								impl->synth.offset( time2, delta, output );
+							}
+						}
+						while ( time2 <= time ) // must advance *past* time to avoid hang
+							time2 += period2;
+						
+						// run wave
+						blip_time_t end = end_time;
+						if ( end > time2 )
+							end = time2;
+						while ( time < end )
+						{
+							if ( wave & 1 )
+							{
+								int amp = volume & -(poly [poly_pos >> 3] >> (poly_pos & 7) & 1);
+								if ( (poly_pos += poly_inc) < 0 )
+									poly_pos += poly_len;
+								int delta = amp - osc_last_amp;
+								if ( delta )
+								{
+									osc_last_amp = amp;
+									impl->synth.offset( time, delta, output );
+								}
+							}
+							wave = run_poly5( wave, poly5_inc );
+							time += period;
+						}
+					}
+					while ( time < end_time || time2 < end_time );
+					
+					osc->phase = poly_pos;
+					osc->last_amp = osc_last_amp;
+				}
+				
+				osc->invert = 0;
+				if ( volume < 0 )
+				{
+					// undo inversion trickery
+					osc->last_amp -= volume;
+					osc->invert = 1;
+				}
+			}
+		}
+		
+		// maintain divider
+		blip_time_t remain = end_time - time;
+		if ( remain > 0 )
+		{
+			blargg_long count = (remain + period - 1) / period;
+			osc->phase ^= count;
+			time += count * period;
+		}
+		osc->delay = time - end_time;
+	}
+	
+	// advance polies
+	blip_time_t duration = end_time - last_time;
+	last_time = end_time;
+	poly4_pos = (poly4_pos + duration) % poly4_len;
+	poly5_pos = (poly5_pos + duration) % poly5_len;
+	polym_pos += duration; // will get %'d on next call
+}
+
+void Sap_Apu::write_data( blip_time_t time, unsigned addr, int data )
+{
+	run_until( time );
+	int i = (addr ^ 0xD200) >> 1;
+	if ( i < osc_count )
+	{
+		oscs [i].regs [addr & 1] = data;
+	}
+	else if ( addr == 0xD208 )
+	{
+		control = data;
+	}
+	else if ( addr == 0xD209 )
+	{
+		oscs [0].delay = 0;
+		oscs [1].delay = 0;
+		oscs [2].delay = 0;
+		oscs [3].delay = 0;
+	}
+	/*
+	// TODO: are polynomials reset in this case?
+	else if ( addr == 0xD20F )
+	{
+		if ( (data & 3) == 0 )
+			polym_pos = 0;
+	}
+	*/
+}
+
+void Sap_Apu::end_frame( blip_time_t end_time )
+{
+	if ( end_time > last_time )
+		run_until( end_time );
+	
+	last_time -= end_time;
+}
diff --git a/libraries/game-music-emu/gme/Sap_Apu.h b/libraries/game-music-emu/gme/Sap_Apu.h
new file mode 100644
index 000000000..1b67571bc
--- /dev/null
+++ b/libraries/game-music-emu/gme/Sap_Apu.h
@@ -0,0 +1,77 @@
+// Atari POKEY sound chip emulator
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef SAP_APU_H
+#define SAP_APU_H
+
+#include "blargg_common.h"
+#include "Blip_Buffer.h"
+
+class Sap_Apu_Impl;
+
+class Sap_Apu {
+public:
+	enum { osc_count = 4 };
+	void osc_output( int index, Blip_Buffer* );
+	
+	void reset( Sap_Apu_Impl* );
+	
+	enum { start_addr = 0xD200 };
+	enum { end_addr   = 0xD209 };
+	void write_data( blip_time_t, unsigned addr, int data );
+	
+	void end_frame( blip_time_t );
+	
+public:
+	Sap_Apu();
+private:
+	struct osc_t
+	{
+		unsigned char regs [2];
+		unsigned char phase;
+		unsigned char invert;
+		int last_amp;
+		blip_time_t delay;
+		blip_time_t period; // always recalculated before use; here for convenience
+		Blip_Buffer* output;
+	};
+	osc_t oscs [osc_count];
+	Sap_Apu_Impl* impl;
+	blip_time_t last_time;
+	int poly5_pos;
+	int poly4_pos;
+	int polym_pos;
+	int control;
+	
+	void calc_periods();
+	void run_until( blip_time_t );
+	
+	enum { poly4_len  = (1L <<  4) - 1 };
+	enum { poly9_len  = (1L <<  9) - 1 };
+	enum { poly17_len = (1L << 17) - 1 };
+	friend class Sap_Apu_Impl;
+};
+
+// Common tables and Blip_Synth that can be shared among multiple Sap_Apu objects
+class Sap_Apu_Impl {
+public:
+	Blip_Synth<blip_good_quality,1> synth;
+	
+	Sap_Apu_Impl();
+	void volume( double d ) { synth.volume( 1.0 / Sap_Apu::osc_count / 30 * d ); }
+	
+private:
+	typedef unsigned char byte;
+	byte poly4  [Sap_Apu::poly4_len  / 8 + 1];
+	byte poly9  [Sap_Apu::poly9_len  / 8 + 1];
+	byte poly17 [Sap_Apu::poly17_len / 8 + 1];
+	friend class Sap_Apu;
+};
+
+inline void Sap_Apu::osc_output( int i, Blip_Buffer* b )
+{
+	assert( (unsigned) i < osc_count );
+	oscs [i].output = b;
+}
+
+#endif
diff --git a/libraries/game-music-emu/gme/Sap_Cpu.cpp b/libraries/game-music-emu/gme/Sap_Cpu.cpp
new file mode 100644
index 000000000..76ae277ad
--- /dev/null
+++ b/libraries/game-music-emu/gme/Sap_Cpu.cpp
@@ -0,0 +1,1004 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Sap_Cpu.h"
+
+#include <limits.h>
+#include "blargg_endian.h"
+
+//#include "nes_cpu_log.h"
+
+/* Copyright (C) 2003-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#define FLUSH_TIME()    (void) (s.time = s_time)
+#define CACHE_TIME()    (void) (s_time = s.time)
+
+#include "sap_cpu_io.h"
+
+#ifndef CPU_DONE
+	#define CPU_DONE( cpu, time, result_out )   { result_out = -1; }
+#endif
+
+#include "blargg_source.h"
+
+int const st_n = 0x80;
+int const st_v = 0x40;
+int const st_r = 0x20;
+int const st_b = 0x10;
+int const st_d = 0x08;
+int const st_i = 0x04;
+int const st_z = 0x02;
+int const st_c = 0x01;
+
+void Sap_Cpu::reset( void* new_mem )
+{
+	check( state == &state_ );
+	state = &state_;
+	mem = (uint8_t*) new_mem;
+	r.status = st_i;
+	r.sp = 0xFF;
+	r.pc = 0;
+	r.a  = 0;
+	r.x  = 0;
+	r.y  = 0;
+	state_.time = 0;
+	state_.base = 0;
+	irq_time_ = future_sap_time;
+	end_time_ = future_sap_time;
+	
+	blargg_verify_byte_order();
+}
+
+#define TIME                    (s_time + s.base)
+#define READ( addr )            CPU_READ( this, (addr), TIME )
+#define WRITE( addr, data )     {CPU_WRITE( this, (addr), (data), TIME );}
+#define READ_LOW( addr )        (mem [int (addr)])
+#define WRITE_LOW( addr, data ) (void) (READ_LOW( addr ) = (data))
+#define READ_PROG( addr )       (READ_LOW( addr ))
+
+#define SET_SP( v )     (sp = ((v) + 1) | 0x100)
+#define GET_SP()        ((sp - 1) & 0xFF)
+#define PUSH( v )       ((sp = (sp - 1) | 0x100), WRITE_LOW( sp, v ))
+
+bool Sap_Cpu::run( sap_time_t end_time )
+{
+	bool illegal_encountered = false;
+	set_end_time( end_time );
+	state_t s = this->state_;
+	this->state = &s;
+	int32_t s_time = s.time;
+	uint8_t* const mem = this->mem; // cache
+	
+	// registers
+	uint16_t pc = r.pc;
+	uint8_t a = r.a;
+	uint8_t x = r.x;
+	uint8_t y = r.y;
+	uint16_t sp;
+	SET_SP( r.sp );
+	
+	// status flags
+	#define IS_NEG (nz & 0x8080)
+	
+	#define CALC_STATUS( out ) do {\
+		out = status & (st_v | st_d | st_i);\
+		out |= ((nz >> 8) | nz) & st_n;\
+		out |= c >> 8 & st_c;\
+		if ( !(nz & 0xFF) ) out |= st_z;\
+	} while ( 0 )
+
+	#define SET_STATUS( in ) do {\
+		status = in & (st_v | st_d | st_i);\
+		nz = in << 8;\
+		c = nz;\
+		nz |= ~in & st_z;\
+	} while ( 0 )
+	
+	uint8_t status;
+	uint16_t c;  // carry set if (c & 0x100) != 0
+	uint16_t nz; // Z set if (nz & 0xFF) == 0, N set if (nz & 0x8080) != 0
+	{
+		uint8_t temp = r.status;
+		SET_STATUS( temp );
+	}
+	
+	goto loop;
+dec_clock_loop:
+	s_time--;
+loop:
+	
+	#ifndef NDEBUG
+	{
+		sap_time_t correct = end_time_;
+		if ( !(status & st_i) && correct > irq_time_ )
+			correct = irq_time_;
+		check( s.base == correct );
+	}
+	#endif
+	
+	check( (unsigned) GET_SP() < 0x100 );
+	check( (unsigned) a < 0x100 );
+	check( (unsigned) x < 0x100 );
+	check( (unsigned) y < 0x100 );
+	
+	uint8_t opcode = mem [pc];
+	pc++;
+	uint8_t const* instr = mem + pc;
+	
+	static uint8_t const clock_table [256] =
+	{// 0 1 2 3 4 5 6 7 8 9 A B C D E F
+		0,6,2,8,3,3,5,5,3,2,2,2,4,4,6,6,// 0
+		3,5,2,8,4,4,6,6,2,4,2,7,4,4,7,7,// 1
+		6,6,2,8,3,3,5,5,4,2,2,2,4,4,6,6,// 2
+		3,5,2,8,4,4,6,6,2,4,2,7,4,4,7,7,// 3
+		6,6,2,8,3,3,5,5,3,2,2,2,3,4,6,6,// 4
+		3,5,2,8,4,4,6,6,2,4,2,7,4,4,7,7,// 5
+		6,6,2,8,3,3,5,5,4,2,2,2,5,4,6,6,// 6
+		3,5,2,8,4,4,6,6,2,4,2,7,4,4,7,7,// 7
+		2,6,2,6,3,3,3,3,2,2,2,2,4,4,4,4,// 8
+		3,6,2,6,4,4,4,4,2,5,2,5,5,5,5,5,// 9
+		2,6,2,6,3,3,3,3,2,2,2,2,4,4,4,4,// A
+		3,5,2,5,4,4,4,4,2,4,2,4,4,4,4,4,// B
+		2,6,2,8,3,3,5,5,2,2,2,2,4,4,6,6,// C
+		3,5,2,8,4,4,6,6,2,4,2,7,4,4,7,7,// D
+		2,6,2,8,3,3,5,5,2,2,2,2,4,4,6,6,// E
+		3,5,2,8,4,4,6,6,2,4,2,7,4,4,7,7 // F
+	}; // 0x00 was 7
+	
+	uint16_t data;
+	data = clock_table [opcode];
+	if ( (s_time += data) >= 0 )
+		goto possibly_out_of_time;
+almost_out_of_time:
+	
+	data = *instr;
+	
+	#ifdef NES_CPU_LOG_H
+		nes_cpu_log( "cpu_log", pc - 1, opcode, instr [0], instr [1] );
+	#endif
+	
+	switch ( opcode )
+	{
+possibly_out_of_time:
+		if ( s_time < (int) data )
+			goto almost_out_of_time;
+		s_time -= data;
+		goto out_of_time;
+
+// Macros
+
+#define GET_MSB()   (instr [1])
+#define ADD_PAGE()  (pc++, data += 0x100 * GET_MSB())
+#define GET_ADDR()  GET_LE16( instr )
+
+#define NO_PAGE_CROSSING( lsb )
+#define HANDLE_PAGE_CROSSING( lsb ) s_time += (lsb) >> 8;
+
+#define INC_DEC_XY( reg, n ) reg = uint8_t (nz = reg + n); goto loop;
+
+#define IND_Y( cross, out ) {\
+		uint16_t temp = READ_LOW( data ) + y;\
+		out = temp + 0x100 * READ_LOW( uint8_t (data + 1) );\
+		cross( temp );\
+	}
+	
+#define IND_X( out ) {\
+		uint16_t temp = data + x;\
+		out = 0x100 * READ_LOW( uint8_t (temp + 1) ) + READ_LOW( uint8_t (temp) );\
+	}
+	
+#define ARITH_ADDR_MODES( op )\
+case op - 0x04: /* (ind,x) */\
+	IND_X( data )\
+	goto ptr##op;\
+case op + 0x0C: /* (ind),y */\
+	IND_Y( HANDLE_PAGE_CROSSING, data )\
+	goto ptr##op;\
+case op + 0x10: /* zp,X */\
+	data = uint8_t (data + x);\
+case op + 0x00: /* zp */\
+	data = READ_LOW( data );\
+	goto imm##op;\
+case op + 0x14: /* abs,Y */\
+	data += y;\
+	goto ind##op;\
+case op + 0x18: /* abs,X */\
+	data += x;\
+ind##op:\
+	HANDLE_PAGE_CROSSING( data );\
+case op + 0x08: /* abs */\
+	ADD_PAGE();\
+ptr##op:\
+	FLUSH_TIME();\
+	data = READ( data );\
+	CACHE_TIME();\
+case op + 0x04: /* imm */\
+imm##op:
+
+// TODO: more efficient way to handle negative branch that wraps PC around
+#define BRANCH( cond )\
+{\
+	int16_t offset = (int8_t) data;\
+	uint16_t extra_clock = (++pc & 0xFF) + offset;\
+	if ( !(cond) ) goto dec_clock_loop;\
+	pc += offset;\
+	s_time += extra_clock >> 8 & 1;\
+	goto loop;\
+}
+
+// Often-Used
+
+	case 0xB5: // LDA zp,x
+		a = nz = READ_LOW( uint8_t (data + x) );
+		pc++;
+		goto loop;
+	
+	case 0xA5: // LDA zp
+		a = nz = READ_LOW( data );
+		pc++;
+		goto loop;
+	
+	case 0xD0: // BNE
+		BRANCH( (uint8_t) nz );
+	
+	case 0x20: { // JSR
+		uint16_t temp = pc + 1;
+		pc = GET_ADDR();
+		WRITE_LOW( 0x100 | (sp - 1), temp >> 8 );
+		sp = (sp - 2) | 0x100;
+		WRITE_LOW( sp, temp );
+		goto loop;
+	}
+	
+	case 0x4C: // JMP abs
+		pc = GET_ADDR();
+		goto loop;
+	
+	case 0xE8: // INX
+		INC_DEC_XY( x, 1 )
+	
+	case 0x10: // BPL
+		BRANCH( !IS_NEG )
+	
+	ARITH_ADDR_MODES( 0xC5 ) // CMP
+		nz = a - data;
+		pc++;
+		c = ~nz;
+		nz &= 0xFF;
+		goto loop;
+	
+	case 0x30: // BMI
+		BRANCH( IS_NEG )
+	
+	case 0xF0: // BEQ
+		BRANCH( !(uint8_t) nz );
+	
+	case 0x95: // STA zp,x
+		data = uint8_t (data + x);
+	case 0x85: // STA zp
+		pc++;
+		WRITE_LOW( data, a );
+		goto loop;
+	
+	case 0xC8: // INY
+		INC_DEC_XY( y, 1 )
+
+	case 0xA8: // TAY
+		y  = a;
+		nz = a;
+		goto loop;
+	
+	case 0x98: // TYA
+		a  = y;
+		nz = y;
+		goto loop;
+	
+	case 0xAD:{// LDA abs
+		unsigned addr = GET_ADDR();
+		pc += 2;
+		nz = READ( addr );
+		a = nz;
+		goto loop;
+	}
+	
+	case 0x60: // RTS
+		pc = 1 + READ_LOW( sp );
+		pc += 0x100 * READ_LOW( 0x100 | (sp - 0xFF) );
+		sp = (sp - 0xFE) | 0x100;
+		goto loop;
+	
+	{
+		uint16_t addr;
+		
+	case 0x99: // STA abs,Y
+		addr = y + GET_ADDR();
+		pc += 2;
+		if ( addr <= 0x7FF )
+		{
+			WRITE_LOW( addr, a );
+			goto loop;
+		}
+		goto sta_ptr;
+	
+	case 0x8D: // STA abs
+		addr = GET_ADDR();
+		pc += 2;
+		if ( addr <= 0x7FF )
+		{
+			WRITE_LOW( addr, a );
+			goto loop;
+		}
+		goto sta_ptr;
+	
+	case 0x9D: // STA abs,X (slightly more common than STA abs)
+		addr = x + GET_ADDR();
+		pc += 2;
+		if ( addr <= 0x7FF )
+		{
+			WRITE_LOW( addr, a );
+			goto loop;
+		}
+	sta_ptr:
+		FLUSH_TIME();
+		WRITE( addr, a );
+		CACHE_TIME();
+		goto loop;
+		
+	case 0x91: // STA (ind),Y
+		IND_Y( NO_PAGE_CROSSING, addr )
+		pc++;
+		goto sta_ptr;
+	
+	case 0x81: // STA (ind,X)
+		IND_X( addr )
+		pc++;
+		goto sta_ptr;
+	
+	}
+	
+	case 0xA9: // LDA #imm
+		pc++;
+		a  = data;
+		nz = data;
+		goto loop;
+
+	// common read instructions
+	{
+		uint16_t addr;
+		
+	case 0xA1: // LDA (ind,X)
+		IND_X( addr )
+		pc++;
+		goto a_nz_read_addr;
+	
+	case 0xB1:// LDA (ind),Y
+		addr = READ_LOW( data ) + y;
+		HANDLE_PAGE_CROSSING( addr );
+		addr += 0x100 * READ_LOW( (uint8_t) (data + 1) );
+		pc++;
+		a = nz = READ_PROG( addr );
+		if ( (addr ^ 0x8000) <= 0x9FFF )
+			goto loop;
+		goto a_nz_read_addr;
+	
+	case 0xB9: // LDA abs,Y
+		HANDLE_PAGE_CROSSING( data + y );
+		addr = GET_ADDR() + y;
+		pc += 2;
+		a = nz = READ_PROG( addr );
+		if ( (addr ^ 0x8000) <= 0x9FFF )
+			goto loop;
+		goto a_nz_read_addr;
+	
+	case 0xBD: // LDA abs,X
+		HANDLE_PAGE_CROSSING( data + x );
+		addr = GET_ADDR() + x;
+		pc += 2;
+		a = nz = READ_PROG( addr );
+		if ( (addr ^ 0x8000) <= 0x9FFF )
+			goto loop;
+	a_nz_read_addr:
+		FLUSH_TIME();
+		a = nz = READ( addr );
+		CACHE_TIME();
+		goto loop;
+	
+	}
+
+// Branch
+
+	case 0x50: // BVC
+		BRANCH( !(status & st_v) )
+	
+	case 0x70: // BVS
+		BRANCH( status & st_v )
+	
+	case 0xB0: // BCS
+		BRANCH( c & 0x100 )
+	
+	case 0x90: // BCC
+		BRANCH( !(c & 0x100) )
+	
+// Load/store
+	
+	case 0x94: // STY zp,x
+		data = uint8_t (data + x);
+	case 0x84: // STY zp
+		pc++;
+		WRITE_LOW( data, y );
+		goto loop;
+	
+	case 0x96: // STX zp,y
+		data = uint8_t (data + y);
+	case 0x86: // STX zp
+		pc++;
+		WRITE_LOW( data, x );
+		goto loop;
+	
+	case 0xB6: // LDX zp,y
+		data = uint8_t (data + y);
+	case 0xA6: // LDX zp
+		data = READ_LOW( data );
+	case 0xA2: // LDX #imm
+		pc++;
+		x = data;
+		nz = data;
+		goto loop;
+	
+	case 0xB4: // LDY zp,x
+		data = uint8_t (data + x);
+	case 0xA4: // LDY zp
+		data = READ_LOW( data );
+	case 0xA0: // LDY #imm
+		pc++;
+		y = data;
+		nz = data;
+		goto loop;
+	
+	case 0xBC: // LDY abs,X
+		data += x;
+		HANDLE_PAGE_CROSSING( data );
+	case 0xAC:{// LDY abs
+		unsigned addr = data + 0x100 * GET_MSB();
+		pc += 2;
+		FLUSH_TIME();
+		y = nz = READ( addr );
+		CACHE_TIME();
+		goto loop;
+	}
+	
+	case 0xBE: // LDX abs,y
+		data += y;
+		HANDLE_PAGE_CROSSING( data );
+	case 0xAE:{// LDX abs
+		unsigned addr = data + 0x100 * GET_MSB();
+		pc += 2;
+		FLUSH_TIME();
+		x = nz = READ( addr );
+		CACHE_TIME();
+		goto loop;
+	}
+	
+	{
+		uint8_t temp;
+	case 0x8C: // STY abs
+		temp = y;
+		goto store_abs;
+	
+	case 0x8E: // STX abs
+		temp = x;
+	store_abs:
+		unsigned addr = GET_ADDR();
+		pc += 2;
+		if ( addr <= 0x7FF )
+		{
+			WRITE_LOW( addr, temp );
+			goto loop;
+		}
+		FLUSH_TIME();
+		WRITE( addr, temp );
+		CACHE_TIME();
+		goto loop;
+	}
+
+// Compare
+
+	case 0xEC:{// CPX abs
+		unsigned addr = GET_ADDR();
+		pc++;
+		FLUSH_TIME();
+		data = READ( addr );
+		CACHE_TIME();
+		goto cpx_data;
+	}
+	
+	case 0xE4: // CPX zp
+		data = READ_LOW( data );
+	case 0xE0: // CPX #imm
+	cpx_data:
+		nz = x - data;
+		pc++;
+		c = ~nz;
+		nz &= 0xFF;
+		goto loop;
+	
+	case 0xCC:{// CPY abs
+		unsigned addr = GET_ADDR();
+		pc++;
+		FLUSH_TIME();
+		data = READ( addr );
+		CACHE_TIME();
+		goto cpy_data;
+	}
+	
+	case 0xC4: // CPY zp
+		data = READ_LOW( data );
+	case 0xC0: // CPY #imm
+	cpy_data:
+		nz = y - data;
+		pc++;
+		c = ~nz;
+		nz &= 0xFF;
+		goto loop;
+	
+// Logical
+
+	ARITH_ADDR_MODES( 0x25 ) // AND
+		nz = (a &= data);
+		pc++;
+		goto loop;
+	
+	ARITH_ADDR_MODES( 0x45 ) // EOR
+		nz = (a ^= data);
+		pc++;
+		goto loop;
+	
+	ARITH_ADDR_MODES( 0x05 ) // ORA
+		nz = (a |= data);
+		pc++;
+		goto loop;
+	
+	case 0x2C:{// BIT abs
+		unsigned addr = GET_ADDR();
+		pc += 2;
+		status &= ~st_v;
+		nz = READ( addr );
+		status |= nz & st_v;
+		if ( a & nz )
+			goto loop;
+		nz <<= 8; // result must be zero, even if N bit is set
+		goto loop;
+	}
+	
+	case 0x24: // BIT zp
+		nz = READ_LOW( data );
+		pc++;
+		status &= ~st_v;
+		status |= nz & st_v;
+		if ( a & nz )
+			goto loop;
+		nz <<= 8; // result must be zero, even if N bit is set
+		goto loop;
+		
+// Add/subtract
+
+	ARITH_ADDR_MODES( 0xE5 ) // SBC
+	case 0xEB: // unofficial equivalent
+		data ^= 0xFF;
+		goto adc_imm;
+	
+	ARITH_ADDR_MODES( 0x65 ) // ADC
+	adc_imm: {
+		check( !(status & st_d) );
+		int16_t carry = c >> 8 & 1;
+		int16_t ov = (a ^ 0x80) + carry + (int8_t) data; // sign-extend
+		status &= ~st_v;
+		status |= ov >> 2 & 0x40;
+		c = nz = a + data + carry;
+		pc++;
+		a = (uint8_t) nz;
+		goto loop;
+	}
+	
+// Shift/rotate
+
+	case 0x4A: // LSR A
+		c = 0;
+	case 0x6A: // ROR A
+		nz = c >> 1 & 0x80;
+		c = a << 8;
+		nz |= a >> 1;
+		a = nz;
+		goto loop;
+
+	case 0x0A: // ASL A
+		nz = a << 1;
+		c = nz;
+		a = (uint8_t) nz;
+		goto loop;
+
+	case 0x2A: { // ROL A
+		nz = a << 1;
+		int16_t temp = c >> 8 & 1;
+		c = nz;
+		nz |= temp;
+		a = (uint8_t) nz;
+		goto loop;
+	}
+	
+	case 0x5E: // LSR abs,X
+		data += x;
+	case 0x4E: // LSR abs
+		c = 0;
+	case 0x6E: // ROR abs
+	ror_abs: {
+		ADD_PAGE();
+		FLUSH_TIME();
+		int temp = READ( data );
+		nz = (c >> 1 & 0x80) | (temp >> 1);
+		c = temp << 8;
+		goto rotate_common;
+	}
+	
+	case 0x3E: // ROL abs,X
+		data += x;
+		goto rol_abs;
+	
+	case 0x1E: // ASL abs,X
+		data += x;
+	case 0x0E: // ASL abs
+		c = 0;
+	case 0x2E: // ROL abs
+	rol_abs:
+		ADD_PAGE();
+		nz = c >> 8 & 1;
+		FLUSH_TIME();
+		nz |= (c = READ( data ) << 1);
+	rotate_common:
+		pc++;
+		WRITE( data, (uint8_t) nz );
+		CACHE_TIME();
+		goto loop;
+	
+	case 0x7E: // ROR abs,X
+		data += x;
+		goto ror_abs;
+	
+	case 0x76: // ROR zp,x
+		data = uint8_t (data + x);
+		goto ror_zp;
+	
+	case 0x56: // LSR zp,x
+		data = uint8_t (data + x);
+	case 0x46: // LSR zp
+		c = 0;
+	case 0x66: // ROR zp
+	ror_zp: {
+		int temp = READ_LOW( data );
+		nz = (c >> 1 & 0x80) | (temp >> 1);
+		c = temp << 8;
+		goto write_nz_zp;
+	}
+	
+	case 0x36: // ROL zp,x
+		data = uint8_t (data + x);
+		goto rol_zp;
+	
+	case 0x16: // ASL zp,x
+		data = uint8_t (data + x);
+	case 0x06: // ASL zp
+		c = 0;
+	case 0x26: // ROL zp
+	rol_zp:
+		nz = c >> 8 & 1;
+		nz |= (c = READ_LOW( data ) << 1);
+		goto write_nz_zp;
+	
+// Increment/decrement
+
+	case 0xCA: // DEX
+		INC_DEC_XY( x, -1 )
+	
+	case 0x88: // DEY
+		INC_DEC_XY( y, -1 )
+	
+	case 0xF6: // INC zp,x
+		data = uint8_t (data + x);
+	case 0xE6: // INC zp
+		nz = 1;
+		goto add_nz_zp;
+	
+	case 0xD6: // DEC zp,x
+		data = uint8_t (data + x);
+	case 0xC6: // DEC zp
+		nz = (uint16_t) -1;
+	add_nz_zp:
+		nz += READ_LOW( data );
+	write_nz_zp:
+		pc++;
+		WRITE_LOW( data, nz );
+		goto loop;
+	
+	case 0xFE: // INC abs,x
+		data = x + GET_ADDR();
+		goto inc_ptr;
+	
+	case 0xEE: // INC abs
+		data = GET_ADDR();
+	inc_ptr:
+		nz = 1;
+		goto inc_common;
+	
+	case 0xDE: // DEC abs,x
+		data = x + GET_ADDR();
+		goto dec_ptr;
+	
+	case 0xCE: // DEC abs
+		data = GET_ADDR();
+	dec_ptr:
+		nz = (uint16_t) -1;
+	inc_common:
+		FLUSH_TIME();
+		nz += READ( data );
+		pc += 2;
+		WRITE( data, (uint8_t) nz );
+		CACHE_TIME();
+		goto loop;
+		
+// Transfer
+
+	case 0xAA: // TAX
+		x  = a;
+		nz = a;
+		goto loop;
+		
+	case 0x8A: // TXA
+		a  = x;
+		nz = x;
+		goto loop;
+
+	case 0x9A: // TXS
+		SET_SP( x ); // verified (no flag change)
+		goto loop;
+	
+	case 0xBA: // TSX
+		x = nz = GET_SP();
+		goto loop;
+	
+// Stack
+	
+	case 0x48: // PHA
+		PUSH( a ); // verified
+		goto loop;
+		
+	case 0x68: // PLA
+		a = nz = READ_LOW( sp );
+		sp = (sp - 0xFF) | 0x100;
+		goto loop;
+		
+	case 0x40:{// RTI
+		uint8_t temp = READ_LOW( sp );
+		pc  = READ_LOW( 0x100 | (sp - 0xFF) );
+		pc |= READ_LOW( 0x100 | (sp - 0xFE) ) * 0x100;
+		sp = (sp - 0xFD) | 0x100;
+		data = status;
+		SET_STATUS( temp );
+		this->r.status = status; // update externally-visible I flag
+		if ( (data ^ status) & st_i )
+		{
+			sap_time_t new_time = end_time_;
+			if ( !(status & st_i) && new_time > irq_time_ )
+				new_time = irq_time_;
+			blargg_long delta = s.base - new_time;
+			s.base = new_time;
+			s_time += delta;
+		}
+		goto loop;
+	}
+	
+	case 0x28:{// PLP
+		uint8_t temp = READ_LOW( sp );
+		sp = (sp - 0xFF) | 0x100;
+		uint8_t changed = status ^ temp;
+		SET_STATUS( temp );
+		if ( !(changed & st_i) )
+			goto loop; // I flag didn't change
+		if ( status & st_i )
+			goto handle_sei;
+		goto handle_cli;
+	}
+	
+	case 0x08: { // PHP
+		uint8_t temp;
+		CALC_STATUS( temp );
+		PUSH( temp | (st_b | st_r) );
+		goto loop;
+	}
+	
+	case 0x6C:{// JMP (ind)
+		data = GET_ADDR();
+		pc = READ_PROG( data );
+		data = (data & 0xFF00) | ((data + 1) & 0xFF);
+		pc |= 0x100 * READ_PROG( data );
+		goto loop;
+	}
+	
+	case 0x00: // BRK
+		goto handle_brk;
+	
+// Flags
+
+	case 0x38: // SEC
+		c = (uint16_t) ~0;
+		goto loop;
+	
+	case 0x18: // CLC
+		c = 0;
+		goto loop;
+		
+	case 0xB8: // CLV
+		status &= ~st_v;
+		goto loop;
+	
+	case 0xD8: // CLD
+		status &= ~st_d;
+		goto loop;
+	
+	case 0xF8: // SED
+		status |= st_d;
+		goto loop;
+	
+	case 0x58: // CLI
+		if ( !(status & st_i) )
+			goto loop;
+		status &= ~st_i;
+	handle_cli: {
+		this->r.status = status; // update externally-visible I flag
+		blargg_long delta = s.base - irq_time_;
+		if ( delta <= 0 )
+		{
+			if ( TIME < irq_time_ )
+				goto loop;
+			goto delayed_cli;
+		}
+		s.base = irq_time_;
+		s_time += delta;
+		if ( s_time < 0 )
+			goto loop;
+		
+		if ( delta >= s_time + 1 )
+		{
+			// delayed irq until after next instruction
+			s.base += s_time + 1;
+			s_time = -1;
+			irq_time_ = s.base; // TODO: remove, as only to satisfy debug check in loop
+			goto loop;
+		}
+	delayed_cli:
+		debug_printf( "Delayed CLI not emulated\n" );
+		goto loop;
+	}
+	
+	case 0x78: // SEI
+		if ( status & st_i )
+			goto loop;
+		status |= st_i;
+	handle_sei: {
+		this->r.status = status; // update externally-visible I flag
+		blargg_long delta = s.base - end_time_;
+		s.base = end_time_;
+		s_time += delta;
+		if ( s_time < 0 )
+			goto loop;
+		debug_printf( "Delayed SEI not emulated\n" );
+		goto loop;
+	}
+	
+// Unofficial
+	
+	// SKW - Skip word
+	case 0x1C: case 0x3C: case 0x5C: case 0x7C: case 0xDC: case 0xFC:
+		HANDLE_PAGE_CROSSING( data + x );
+	case 0x0C:
+		pc++;
+	// SKB - Skip byte
+	case 0x74: case 0x04: case 0x14: case 0x34: case 0x44: case 0x54: case 0x64:
+	case 0x80: case 0x82: case 0x89: case 0xC2: case 0xD4: case 0xE2: case 0xF4:
+		pc++;
+		goto loop;
+	
+	// NOP
+	case 0xEA: case 0x1A: case 0x3A: case 0x5A: case 0x7A: case 0xDA: case 0xFA:
+		goto loop;
+	
+// Unimplemented
+	
+	// halt
+	//case 0x02: case 0x12: case 0x22: case 0x32: case 0x42: case 0x52:
+	//case 0x62: case 0x72: case 0x92: case 0xB2: case 0xD2: case 0xF2:
+	
+	default:
+		illegal_encountered = true;
+		pc--;
+		goto stop;
+	}
+	assert( false );
+	
+	int result_;
+handle_brk:
+	if ( (pc - 1) >= idle_addr )
+		goto idle_done;
+	pc++;
+	result_ = 4;
+	debug_printf( "BRK executed\n" );
+	
+interrupt:
+	{
+		s_time += 7;
+		
+		WRITE_LOW( 0x100 | (sp - 1), pc >> 8 );
+		WRITE_LOW( 0x100 | (sp - 2), pc );
+		pc = GET_LE16( &READ_PROG( 0xFFFA ) + result_ );
+		
+		sp = (sp - 3) | 0x100;
+		uint8_t temp;
+		CALC_STATUS( temp );
+		temp |= st_r;
+		if ( result_ )
+			temp |= st_b; // TODO: incorrectly sets B flag for IRQ
+		WRITE_LOW( sp, temp );
+		
+		status &= ~st_d;
+		status |= st_i;
+		this->r.status = status; // update externally-visible I flag
+		
+		blargg_long delta = s.base - end_time_;
+		s.base = end_time_;
+		s_time += delta;
+		goto loop;
+	}
+	
+idle_done:
+	//s_time = 0;
+	pc--;
+	goto stop;
+out_of_time:
+	pc--;
+	FLUSH_TIME();
+	CPU_DONE( this, TIME, result_ );
+	CACHE_TIME();
+	if ( result_ >= 0 )
+		goto interrupt;
+	if ( s_time < 0 )
+		goto loop;
+	
+stop:
+	
+	s.time = s_time;
+	
+	r.pc = pc;
+	r.sp = GET_SP();
+	r.a = a;
+	r.x = x;
+	r.y = y;
+	
+	{
+		uint8_t temp;
+		CALC_STATUS( temp );
+		r.status = temp;
+	}
+	
+	this->state_ = s;
+	this->state = &this->state_;
+	
+	return illegal_encountered;
+}
+
diff --git a/libraries/game-music-emu/gme/Sap_Cpu.h b/libraries/game-music-emu/gme/Sap_Cpu.h
new file mode 100644
index 000000000..fdfb9a310
--- /dev/null
+++ b/libraries/game-music-emu/gme/Sap_Cpu.h
@@ -0,0 +1,81 @@
+// Atari 6502 CPU emulator
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef SAP_CPU_H
+#define SAP_CPU_H
+
+#include "blargg_common.h"
+
+typedef blargg_long sap_time_t; // clock cycle count
+typedef unsigned sap_addr_t; // 16-bit address
+enum { future_sap_time = INT_MAX / 2 + 1 };
+
+class Sap_Cpu {
+public:
+	// Clear all registers and keep pointer to 64K memory passed in
+	void reset( void* mem_64k );
+	
+	// Run until specified time is reached. Returns true if suspicious/unsupported
+	// instruction was encountered at any point during run.
+	bool run( sap_time_t end_time );
+	
+	// Registers are not updated until run() returns (except I flag in status)
+	struct registers_t {
+		uint16_t pc;
+		uint8_t a;
+		uint8_t x;
+		uint8_t y;
+		uint8_t status;
+		uint8_t sp;
+	};
+	registers_t r;
+	
+	enum { idle_addr = 0xFEFF };
+	
+	// Time of beginning of next instruction to be executed
+	sap_time_t time() const             { return state->time + state->base; }
+	void set_time( sap_time_t t )       { state->time = t - state->base; }
+	void adjust_time( int delta )       { state->time += delta; }
+	
+	sap_time_t irq_time() const         { return irq_time_; }
+	void set_irq_time( sap_time_t );
+	
+	sap_time_t end_time() const         { return end_time_; }
+	void set_end_time( sap_time_t );
+	
+public:
+	Sap_Cpu() { state = &state_; }
+	enum { irq_inhibit = 0x04 };
+private:
+	struct state_t {
+		sap_time_t base;
+		sap_time_t time;
+	};
+	state_t* state; // points to state_ or a local copy within run()
+	state_t state_;
+	sap_time_t irq_time_;
+	sap_time_t end_time_;
+	uint8_t* mem;
+	
+	inline sap_time_t update_end_time( sap_time_t end, sap_time_t irq );
+};
+
+inline sap_time_t Sap_Cpu::update_end_time( sap_time_t t, sap_time_t irq )
+{
+	if ( irq < t && !(r.status & irq_inhibit) ) t = irq;
+	sap_time_t delta = state->base - t;
+	state->base = t;
+	return delta;
+}
+
+inline void Sap_Cpu::set_irq_time( sap_time_t t )
+{
+	state->time += update_end_time( end_time_, (irq_time_ = t) );
+}
+
+inline void Sap_Cpu::set_end_time( sap_time_t t )
+{
+	state->time += update_end_time( (end_time_ = t), irq_time_ );
+}
+
+#endif
diff --git a/libraries/game-music-emu/gme/Sap_Emu.cpp b/libraries/game-music-emu/gme/Sap_Emu.cpp
new file mode 100644
index 000000000..dc5d666d6
--- /dev/null
+++ b/libraries/game-music-emu/gme/Sap_Emu.cpp
@@ -0,0 +1,443 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Sap_Emu.h"
+
+#include "blargg_endian.h"
+#include <string.h>
+
+/* Copyright (C) 2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+long const base_scanline_period = 114;
+
+Sap_Emu::Sap_Emu()
+{
+	set_type( gme_sap_type );
+	
+	static const char* const names [Sap_Apu::osc_count * 2] = {
+		"Wave 1", "Wave 2", "Wave 3", "Wave 4",
+		"Wave 5", "Wave 6", "Wave 7", "Wave 8",
+	};
+	set_voice_names( names );
+	
+	static int const types [Sap_Apu::osc_count * 2] = {
+		wave_type | 1, wave_type | 2, wave_type | 3, wave_type | 0,
+		wave_type | 5, wave_type | 6, wave_type | 7, wave_type | 4,
+	};
+	set_voice_types( types );
+	set_silence_lookahead( 6 );
+}
+
+Sap_Emu::~Sap_Emu() { }
+
+// Track info
+
+// Returns 16 or greater if not hex
+inline int from_hex_char( int h )
+{
+	h -= 0x30;
+	if ( (unsigned) h > 9 )
+		h = ((h - 0x11) & 0xDF) + 10;
+	return h;
+}
+
+static long from_hex( byte const* in )
+{
+	unsigned result = 0;
+	for ( int n = 4; n--; )
+	{
+		int h = from_hex_char( *in++ );
+		if ( h > 15 )
+			return -1;
+		result = result * 0x10 + h;
+	}
+	return result;
+}
+
+static int from_dec( byte const* in, byte const* end )
+{
+	if ( in >= end )
+		return -1;
+	
+	int n = 0;
+	while ( in < end )
+	{
+		int dig = *in++ - '0';
+		if ( (unsigned) dig > 9 )
+			return -1;
+		n = n * 10 + dig;
+	}
+	return n;
+}
+
+static void parse_string( byte const* in, byte const* end, int len, char* out )
+{
+	byte const* start = in;
+	if ( *in++ == '\"' )
+	{
+		start++;
+		while ( in < end && *in != '\"' )
+			in++;
+	}
+	else
+	{
+		in = end;
+	}
+	len = min( len - 1, int (in - start) );
+	out [len] = 0;
+	memcpy( out, start, len );
+}
+
+static blargg_err_t parse_info( byte const* in, long size, Sap_Emu::info_t* out )
+{
+	out->track_count   = 1;
+	out->author    [0] = 0;
+	out->name      [0] = 0;
+	out->copyright [0] = 0;
+	
+	if ( size < 16 || memcmp( in, "SAP\x0D\x0A", 5 ) )
+		return gme_wrong_file_type;
+	
+	byte const* file_end = in + size - 5;
+	in += 5;
+	while ( in < file_end && (in [0] != 0xFF || in [1] != 0xFF) )
+	{
+		byte const* line_end = in;
+		while ( line_end < file_end && *line_end != 0x0D )
+			line_end++;
+		
+		char const* tag = (char const*) in;
+		while ( in < line_end && *in > ' ' )
+			in++;
+		int tag_len = (char const*) in - tag;
+		
+		while ( in < line_end && *in <= ' ' ) in++;
+		
+		if ( tag_len <= 0 )
+		{
+			// skip line
+		}
+		else if ( !strncmp( "INIT", tag, tag_len ) )
+		{
+			out->init_addr = from_hex( in );
+			if ( (unsigned long) out->init_addr > 0xFFFF )
+				return "Invalid init address";
+		}
+		else if ( !strncmp( "PLAYER", tag, tag_len ) )
+		{
+			out->play_addr = from_hex( in );
+			if ( (unsigned long) out->play_addr > 0xFFFF )
+				return "Invalid play address";
+		}
+		else if ( !strncmp( "MUSIC", tag, tag_len ) )
+		{
+			out->music_addr = from_hex( in );
+			if ( (unsigned long) out->music_addr > 0xFFFF )
+				return "Invalid music address";
+		}
+		else if ( !strncmp( "SONGS", tag, tag_len ) )
+		{
+			out->track_count = from_dec( in, line_end );
+			if ( out->track_count <= 0 )
+				return "Invalid track count";
+		}
+		else if ( !strncmp( "TYPE", tag, tag_len ) )
+		{
+			switch ( out->type = *in )
+			{
+			case 'C':
+			case 'B':
+				break;
+			
+			case 'D':
+				return "Digimusic not supported";
+			
+			default:
+				return "Unsupported player type";
+			}
+		}
+		else if ( !strncmp( "STEREO", tag, tag_len ) )
+		{
+			out->stereo = true;
+		}
+		else if ( !strncmp( "FASTPLAY", tag, tag_len ) )
+		{
+			out->fastplay = from_dec( in, line_end );
+			if ( out->fastplay <= 0 )
+				return "Invalid fastplay value";
+		}
+		else if ( !strncmp( "AUTHOR", tag, tag_len ) )
+		{
+			parse_string( in, line_end, sizeof out->author, out->author );
+		}
+		else if ( !strncmp( "NAME", tag, tag_len ) )
+		{
+			parse_string( in, line_end, sizeof out->name, out->name );
+		}
+		else if ( !strncmp( "DATE", tag, tag_len ) )
+		{
+			parse_string( in, line_end, sizeof out->copyright, out->copyright );
+		}
+		
+		in = line_end + 2;
+	}
+	
+	if ( in [0] != 0xFF || in [1] != 0xFF )
+		return "ROM data missing";
+	out->rom_data = in + 2;
+	
+	return 0;
+}
+
+static void copy_sap_fields( Sap_Emu::info_t const& in, track_info_t* out )
+{
+	Gme_File::copy_field_( out->game,      in.name );
+	Gme_File::copy_field_( out->author,    in.author );
+	Gme_File::copy_field_( out->copyright, in.copyright );
+}
+
+blargg_err_t Sap_Emu::track_info_( track_info_t* out, int ) const
+{
+	copy_sap_fields( info, out );
+	return 0;
+}
+
+struct Sap_File : Gme_Info_
+{
+	Sap_Emu::info_t info;
+	
+	Sap_File() { set_type( gme_sap_type ); }
+	
+	blargg_err_t load_mem_( byte const* begin, long size )
+	{
+		RETURN_ERR( parse_info( begin, size, &info ) );
+		set_track_count( info.track_count );
+		return 0;
+	}
+	
+	blargg_err_t track_info_( track_info_t* out, int ) const
+	{
+		copy_sap_fields( info, out );
+		return 0;
+	}
+};
+
+static Music_Emu* new_sap_emu () { return BLARGG_NEW Sap_Emu ; }
+static Music_Emu* new_sap_file() { return BLARGG_NEW Sap_File; }
+
+static gme_type_t_ const gme_sap_type_ = { "Atari XL", 0, &new_sap_emu, &new_sap_file, "SAP", 1 };
+BLARGG_EXPORT extern gme_type_t const gme_sap_type = &gme_sap_type_;
+
+// Setup
+
+blargg_err_t Sap_Emu::load_mem_( byte const* in, long size )
+{
+	file_end = in + size;
+	
+	info.warning    = 0;
+	info.type       = 'B';
+	info.stereo     = false;
+	info.init_addr  = -1;
+	info.play_addr  = -1;
+	info.music_addr = -1;
+	info.fastplay   = 312;
+	RETURN_ERR( parse_info( in, size, &info ) );
+	
+	set_warning( info.warning );
+	set_track_count( info.track_count );
+	set_voice_count( Sap_Apu::osc_count << info.stereo );
+	apu_impl.volume( gain() );
+	
+	return setup_buffer( 1773447 );
+}
+
+void Sap_Emu::update_eq( blip_eq_t const& eq )
+{
+	apu_impl.synth.treble_eq( eq );
+}
+
+void Sap_Emu::set_voice( int i, Blip_Buffer* center, Blip_Buffer* left, Blip_Buffer* right )
+{
+	int i2 = i - Sap_Apu::osc_count;
+	if ( i2 >= 0 )
+		apu2.osc_output( i2, right );
+	else
+		apu.osc_output( i, (info.stereo ? left : center) );
+}
+
+// Emulation
+
+void Sap_Emu::set_tempo_( double t )
+{
+	scanline_period = sap_time_t (base_scanline_period / t);
+}
+
+inline sap_time_t Sap_Emu::play_period() const { return info.fastplay * scanline_period; }
+
+void Sap_Emu::cpu_jsr( sap_addr_t addr )
+{
+	check( r.sp >= 0xFE ); // catch anything trying to leave data on stack
+	r.pc = addr;
+	int high_byte = (idle_addr - 1) >> 8;
+	if ( r.sp == 0xFE && mem.ram [0x1FF] == high_byte )
+		r.sp = 0xFF; // pop extra byte off
+	mem.ram [0x100 + r.sp--] = high_byte; // some routines use RTI to return
+	mem.ram [0x100 + r.sp--] = high_byte;
+	mem.ram [0x100 + r.sp--] = (idle_addr - 1) & 0xFF;
+}
+
+void Sap_Emu::run_routine( sap_addr_t addr )
+{
+	cpu_jsr( addr );
+	cpu::run( 312 * base_scanline_period * 60 );
+	check( r.pc == idle_addr );
+}
+
+inline void Sap_Emu::call_init( int track )
+{
+	switch ( info.type )
+	{
+	case 'B':
+		r.a = track;
+		run_routine( info.init_addr );
+		break;
+	
+	case 'C':
+		r.a = 0x70;
+		r.x = info.music_addr&0xFF;
+		r.y = info.music_addr >> 8;
+		run_routine( info.play_addr + 3 );
+		r.a = 0;
+		r.x = track;
+		run_routine( info.play_addr + 3 );
+		break;
+	}
+}
+
+blargg_err_t Sap_Emu::start_track_( int track )
+{
+	RETURN_ERR( Classic_Emu::start_track_( track ) );
+	
+	memset( &mem, 0, sizeof mem );
+
+	byte const* in = info.rom_data;
+	while ( file_end - in >= 5 )
+	{
+		unsigned start = get_le16( in );
+		unsigned end   = get_le16( in + 2 );
+		//debug_printf( "Block $%04X-$%04X\n", start, end );
+		in += 4;
+		if ( end < start )
+		{
+			set_warning( "Invalid file data block" );
+			break;
+		}
+		long len = end - start + 1;
+		if ( len > file_end - in )
+		{
+			set_warning( "Invalid file data block" );
+			break;
+		}
+		
+		memcpy( mem.ram + start, in, len );
+		in += len;
+		if ( file_end - in >= 2 && in [0] == 0xFF && in [1] == 0xFF )
+			in += 2;
+	}
+	
+	apu.reset( &apu_impl );
+	apu2.reset( &apu_impl );
+	cpu::reset( mem.ram );
+	time_mask = 0; // disables sound during init
+	call_init( track );
+	time_mask = -1;
+	
+	next_play = play_period();
+	
+	return 0;
+}
+
+// Emulation
+
+// see sap_cpu_io.h for read/write functions
+
+void Sap_Emu::cpu_write_( sap_addr_t addr, int data )
+{
+	if ( (addr ^ Sap_Apu::start_addr) <= (Sap_Apu::end_addr - Sap_Apu::start_addr) )
+	{
+		GME_APU_HOOK( this, addr - Sap_Apu::start_addr, data );
+		apu.write_data( time() & time_mask, addr, data );
+		return;
+	}
+	
+	if ( (addr ^ (Sap_Apu::start_addr + 0x10)) <= (Sap_Apu::end_addr - Sap_Apu::start_addr) &&
+			info.stereo )
+	{
+		GME_APU_HOOK( this, addr - 0x10 - Sap_Apu::start_addr + 10, data );
+		apu2.write_data( time() & time_mask, addr ^ 0x10, data );
+		return;
+	}
+
+	if ( (addr & ~0x0010) != 0xD20F || data != 0x03 )
+		debug_printf( "Unmapped write $%04X <- $%02X\n", addr, data );
+}
+
+inline void Sap_Emu::call_play()
+{
+	switch ( info.type )
+	{
+	case 'B':
+		cpu_jsr( info.play_addr );
+		break;
+	
+	case 'C':
+		cpu_jsr( info.play_addr + 6 );
+		break;
+	}
+}
+
+blargg_err_t Sap_Emu::run_clocks( blip_time_t& duration, int )
+{
+	set_time( 0 );
+	while ( time() < duration )
+	{
+		if ( cpu::run( duration ) || r.pc > idle_addr )
+			return "Emulation error (illegal instruction)";
+		
+		if ( r.pc == idle_addr )
+		{
+			if ( next_play <= duration )
+			{
+				set_time( next_play );
+				next_play += play_period();
+				call_play();
+				GME_FRAME_HOOK( this );
+			}
+			else
+			{
+				set_time( duration );
+			}
+		}
+	}
+	
+	duration = time();
+	next_play -= duration;
+	check( next_play >= 0 );
+	if ( next_play < 0 )
+		next_play = 0;
+	apu.end_frame( duration );
+	if ( info.stereo )
+		apu2.end_frame( duration );
+	
+	return 0;
+}
diff --git a/libraries/game-music-emu/gme/Sap_Emu.h b/libraries/game-music-emu/gme/Sap_Emu.h
new file mode 100644
index 000000000..f75312713
--- /dev/null
+++ b/libraries/game-music-emu/gme/Sap_Emu.h
@@ -0,0 +1,68 @@
+// Atari XL/XE SAP music file emulator
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef SAP_EMU_H
+#define SAP_EMU_H
+
+#include "Classic_Emu.h"
+#include "Sap_Apu.h"
+#include "Sap_Cpu.h"
+
+class Sap_Emu : private Sap_Cpu, public Classic_Emu {
+	typedef Sap_Cpu cpu;
+public:
+	static gme_type_t static_type() { return gme_sap_type; }
+public:
+	Sap_Emu();
+	~Sap_Emu();
+	struct info_t {
+		byte const* rom_data;
+		const char* warning;
+		long init_addr;
+		long play_addr;
+		long music_addr;
+		int  type;
+		int  track_count;
+		int  fastplay;
+		bool stereo;
+		char author    [256];
+		char name      [256];
+		char copyright [ 32];
+	};
+protected:
+	blargg_err_t track_info_( track_info_t*, int track ) const;
+	blargg_err_t load_mem_( byte const*, long );
+	blargg_err_t start_track_( int );
+	blargg_err_t run_clocks( blip_time_t&, int );
+	void set_tempo_( double );
+	void set_voice( int, Blip_Buffer*, Blip_Buffer*, Blip_Buffer* );
+	void update_eq( blip_eq_t const& );
+public: private: friend class Sap_Cpu;
+	int cpu_read( sap_addr_t );
+	void cpu_write( sap_addr_t, int );
+	void cpu_write_( sap_addr_t, int );
+private:
+	info_t info;
+	
+	byte const* file_end;
+	sap_time_t scanline_period;
+	sap_time_t next_play;
+	sap_time_t time_mask;
+	Sap_Apu apu;
+	Sap_Apu apu2;
+	
+	// large items
+	struct {
+		byte padding1 [0x100];
+		byte ram [0x10000 + 0x100];
+	} mem;
+	Sap_Apu_Impl apu_impl;
+	
+	sap_time_t play_period() const;
+	void call_play();
+	void cpu_jsr( sap_addr_t );
+	void call_init( int track );
+	void run_routine( sap_addr_t );
+};
+
+#endif
diff --git a/libraries/game-music-emu/gme/Sms_Apu.cpp b/libraries/game-music-emu/gme/Sms_Apu.cpp
new file mode 100644
index 000000000..b41fdec41
--- /dev/null
+++ b/libraries/game-music-emu/gme/Sms_Apu.cpp
@@ -0,0 +1,330 @@
+// Sms_Snd_Emu 0.1.4. http://www.slack.net/~ant/
+
+#include "Sms_Apu.h"
+
+/* Copyright (C) 2003-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+// Sms_Osc
+
+Sms_Osc::Sms_Osc()
+{
+	output = 0;
+	outputs [0] = 0; // always stays NULL
+	outputs [1] = 0;
+	outputs [2] = 0;
+	outputs [3] = 0;
+}
+
+void Sms_Osc::reset()
+{
+	delay = 0;
+	last_amp = 0;
+	volume = 0;
+	output_select = 3;
+	output = outputs [3];
+}
+
+// Sms_Square
+
+inline void Sms_Square::reset()
+{
+	period = 0;
+	phase = 0;
+	Sms_Osc::reset();
+}
+
+void Sms_Square::run( blip_time_t time, blip_time_t end_time )
+{
+	if ( !volume || period <= 128 )
+	{
+		// ignore 16kHz and higher
+		if ( last_amp )
+		{
+			synth->offset( time, -last_amp, output );
+			last_amp = 0;
+		}
+		time += delay;
+		if ( !period )
+		{
+			time = end_time;
+		}
+		else if ( time < end_time )
+		{
+			// keep calculating phase
+			int count = (end_time - time + period - 1) / period;
+			phase = (phase + count) & 1;
+			time += count * period;
+		}
+	}
+	else
+	{
+		int amp = phase ? volume : -volume;
+		{
+			int delta = amp - last_amp;
+			if ( delta )
+			{
+				last_amp = amp;
+				synth->offset( time, delta, output );
+			}
+		}
+		
+		time += delay;
+		if ( time < end_time )
+		{
+			Blip_Buffer* const output = this->output;
+			int delta = amp * 2;
+			do
+			{
+				delta = -delta;
+				synth->offset_inline( time, delta, output );
+				time += period;
+				phase ^= 1;
+			}
+			while ( time < end_time );
+			this->last_amp = phase ? volume : -volume;
+		}
+	}
+	delay = time - end_time;
+}
+
+// Sms_Noise
+
+static int const noise_periods [3] = { 0x100, 0x200, 0x400 };
+
+inline void Sms_Noise::reset()
+{
+	period = &noise_periods [0];
+	shifter = 0x8000;
+	feedback = 0x9000;
+	Sms_Osc::reset();
+}
+
+void Sms_Noise::run( blip_time_t time, blip_time_t end_time )
+{
+	int amp = volume;
+	if ( shifter & 1 )
+		amp = -amp;
+	
+	{
+		int delta = amp - last_amp;
+		if ( delta )
+		{
+			last_amp = amp;
+			synth.offset( time, delta, output );
+		}
+	}
+	
+	time += delay;
+	if ( !volume )
+		time = end_time;
+	
+	if ( time < end_time )
+	{
+		Blip_Buffer* const output = this->output;
+		unsigned shifter = this->shifter;
+		int delta = amp * 2;
+		int period = *this->period * 2;
+		if ( !period )
+			period = 16;
+		
+		do
+		{
+			int changed = shifter + 1;
+			shifter = (feedback & -(shifter & 1)) ^ (shifter >> 1);
+			if ( changed & 2 ) // true if bits 0 and 1 differ
+			{
+				delta = -delta;
+				synth.offset_inline( time, delta, output );
+			}
+			time += period;
+		}
+		while ( time < end_time );
+		
+		this->shifter = shifter;
+		this->last_amp = delta >> 1;
+	}
+	delay = time - end_time;
+}
+
+// Sms_Apu
+
+Sms_Apu::Sms_Apu()
+{
+	for ( int i = 0; i < 3; i++ )
+	{
+		squares [i].synth = &square_synth;
+		oscs [i] = &squares [i];
+	}
+	oscs [3] = &noise;
+	
+	volume( 1.0 );
+	reset();
+}
+
+Sms_Apu::~Sms_Apu()
+{
+}
+
+void Sms_Apu::volume( double vol )
+{
+	vol *= 0.85 / (osc_count * 64 * 2);
+	square_synth.volume( vol );
+	noise.synth.volume( vol );
+}
+
+void Sms_Apu::treble_eq( const blip_eq_t& eq )
+{
+	square_synth.treble_eq( eq );
+	noise.synth.treble_eq( eq );
+}
+
+void Sms_Apu::osc_output( int index, Blip_Buffer* center, Blip_Buffer* left, Blip_Buffer* right )
+{
+	require( (unsigned) index < osc_count );
+	require( (center && left && right) || (!center && !left && !right) );
+	Sms_Osc& osc = *oscs [index];
+	osc.outputs [1] = right;
+	osc.outputs [2] = left;
+	osc.outputs [3] = center;
+	osc.output = osc.outputs [osc.output_select];
+}
+
+void Sms_Apu::output( Blip_Buffer* center, Blip_Buffer* left, Blip_Buffer* right )
+{
+	for ( int i = 0; i < osc_count; i++ )
+		osc_output( i, center, left, right );
+}
+
+void Sms_Apu::reset( unsigned feedback, int noise_width )
+{
+	last_time = 0;
+	latch = 0;
+	
+	if ( !feedback || !noise_width )
+	{
+		feedback = 0x0009;
+		noise_width = 16;
+	}
+	// convert to "Galios configuration"
+	looped_feedback = 1 << (noise_width - 1);
+	noise_feedback  = 0;
+	while ( noise_width-- )
+	{
+		noise_feedback = (noise_feedback << 1) | (feedback & 1);
+		feedback >>= 1;
+	}
+	
+	squares [0].reset();
+	squares [1].reset();
+	squares [2].reset();
+	noise.reset();
+}
+
+void Sms_Apu::run_until( blip_time_t end_time )
+{
+	require( end_time >= last_time ); // end_time must not be before previous time
+	
+	if ( end_time > last_time )
+	{
+		// run oscillators
+		for ( int i = 0; i < osc_count; ++i )
+		{
+			Sms_Osc& osc = *oscs [i];
+			if ( osc.output )
+			{
+				osc.output->set_modified();
+				if ( i < 3 )
+					squares [i].run( last_time, end_time );
+				else
+					noise.run( last_time, end_time );
+			}
+		}
+		
+		last_time = end_time;
+	}
+}
+
+void Sms_Apu::end_frame( blip_time_t end_time )
+{
+	if ( end_time > last_time )
+		run_until( end_time );
+	
+	assert( last_time >= end_time );
+	last_time -= end_time;
+}
+
+void Sms_Apu::write_ggstereo( blip_time_t time, int data )
+{
+	require( (unsigned) data <= 0xFF );
+	
+	run_until( time );
+	
+	for ( int i = 0; i < osc_count; i++ )
+	{
+		Sms_Osc& osc = *oscs [i];
+		int flags = data >> i;
+		Blip_Buffer* old_output = osc.output;
+		osc.output_select = (flags >> 3 & 2) | (flags & 1);
+		osc.output = osc.outputs [osc.output_select];
+		if ( osc.output != old_output && osc.last_amp )
+		{
+			if ( old_output )
+			{
+				old_output->set_modified();
+				square_synth.offset( time, -osc.last_amp, old_output );
+			}
+			osc.last_amp = 0;
+		}
+	}
+}
+
+// volumes [i] = 64 * pow( 1.26, 15 - i ) / pow( 1.26, 15 )
+static unsigned char const volumes [16] = {
+	64, 50, 39, 31, 24, 19, 15, 12, 9, 7, 5, 4, 3, 2, 1, 0
+};
+
+void Sms_Apu::write_data( blip_time_t time, int data )
+{
+	require( (unsigned) data <= 0xFF );
+	
+	run_until( time );
+	
+	if ( data & 0x80 )
+		latch = data;
+	
+	int index = (latch >> 5) & 3;
+	if ( latch & 0x10 )
+	{
+		oscs [index]->volume = volumes [data & 15];
+	}
+	else if ( index < 3 )
+	{
+		Sms_Square& sq = squares [index];
+		if ( data & 0x80 )
+			sq.period = (sq.period & 0xFF00) | (data << 4 & 0x00FF);
+		else
+			sq.period = (sq.period & 0x00FF) | (data << 8 & 0x3F00);
+	}
+	else
+	{
+		int select = data & 3;
+		if ( select < 3 )
+			noise.period = &noise_periods [select];
+		else
+			noise.period = &squares [2].period;
+		
+		noise.feedback = (data & 0x04) ? noise_feedback : looped_feedback;
+		noise.shifter = 0x8000;
+	}
+}
diff --git a/libraries/game-music-emu/gme/Sms_Apu.h b/libraries/game-music-emu/gme/Sms_Apu.h
new file mode 100644
index 000000000..3c11a9c3c
--- /dev/null
+++ b/libraries/game-music-emu/gme/Sms_Apu.h
@@ -0,0 +1,75 @@
+// Sega Master System SN76489 PSG sound chip emulator
+
+// Sms_Snd_Emu 0.1.4
+#ifndef SMS_APU_H
+#define SMS_APU_H
+
+#include "Sms_Oscs.h"
+
+class Sms_Apu {
+public:
+	// Set overall volume of all oscillators, where 1.0 is full volume
+	void volume( double );
+	
+	// Set treble equalization
+	void treble_eq( const blip_eq_t& );
+	
+	// Outputs can be assigned to a single buffer for mono output, or to three
+	// buffers for stereo output (using Stereo_Buffer to do the mixing).
+	
+	// Assign all oscillator outputs to specified buffer(s). If buffer
+	// is NULL, silences all oscillators.
+	void output( Blip_Buffer* mono );
+	void output( Blip_Buffer* center, Blip_Buffer* left, Blip_Buffer* right );
+	
+	// Assign single oscillator output to buffer(s). Valid indicies are 0 to 3,
+	// which refer to Square 1, Square 2, Square 3, and Noise. If buffer is NULL,
+	// silences oscillator.
+	enum { osc_count = 4 };
+	void osc_output( int index, Blip_Buffer* mono );
+	void osc_output( int index, Blip_Buffer* center, Blip_Buffer* left, Blip_Buffer* right );
+	
+	// Reset oscillators and internal state
+	void reset( unsigned noise_feedback = 0, int noise_width = 0 );
+	
+	// Write GameGear left/right assignment byte
+	void write_ggstereo( blip_time_t, int );
+	
+	// Write to data port
+	void write_data( blip_time_t, int );
+	
+	// Run all oscillators up to specified time, end current frame, then
+	// start a new frame at time 0.
+	void end_frame( blip_time_t );
+
+public:
+	Sms_Apu();
+	~Sms_Apu();
+private:
+	// noncopyable
+	Sms_Apu( const Sms_Apu& );
+	Sms_Apu& operator = ( const Sms_Apu& );
+	
+	Sms_Osc*    oscs [osc_count];
+	Sms_Square  squares [3];
+	Sms_Square::Synth square_synth; // used by squares
+	blip_time_t last_time;
+	int         latch;
+	Sms_Noise   noise;
+	unsigned    noise_feedback;
+	unsigned    looped_feedback;
+	
+	void run_until( blip_time_t );
+};
+
+struct sms_apu_state_t
+{
+	unsigned char regs [8] [2];
+	unsigned char latch;
+};
+
+inline void Sms_Apu::output( Blip_Buffer* b ) { output( b, b, b ); }
+
+inline void Sms_Apu::osc_output( int i, Blip_Buffer* b ) { osc_output( i, b, b, b ); }
+
+#endif
diff --git a/libraries/game-music-emu/gme/Sms_Oscs.h b/libraries/game-music-emu/gme/Sms_Oscs.h
new file mode 100644
index 000000000..2a896fef3
--- /dev/null
+++ b/libraries/game-music-emu/gme/Sms_Oscs.h
@@ -0,0 +1,49 @@
+// Private oscillators used by Sms_Apu
+
+// Sms_Snd_Emu 0.1.4
+#ifndef SMS_OSCS_H
+#define SMS_OSCS_H
+
+#include "blargg_common.h"
+#include "Blip_Buffer.h"
+
+struct Sms_Osc
+{
+	Blip_Buffer* outputs [4]; // NULL, right, left, center
+	Blip_Buffer* output;
+	int output_select;
+	
+	int delay;
+	int last_amp;
+	int volume;
+	
+	Sms_Osc();
+	void reset();
+};
+
+struct Sms_Square : Sms_Osc
+{
+	int period;
+	int phase;
+	
+	typedef Blip_Synth<blip_good_quality,1> Synth;
+	const Synth* synth;
+	
+	void reset();
+	void run( blip_time_t, blip_time_t );
+};
+
+struct Sms_Noise : Sms_Osc
+{
+	const int* period;
+	unsigned shifter;
+	unsigned feedback;
+	
+	typedef Blip_Synth<blip_med_quality,1> Synth;
+	Synth synth;
+	
+	void reset();
+	void run( blip_time_t, blip_time_t );
+};
+
+#endif
diff --git a/libraries/game-music-emu/gme/Snes_Spc.cpp b/libraries/game-music-emu/gme/Snes_Spc.cpp
new file mode 100644
index 000000000..0b2077d8c
--- /dev/null
+++ b/libraries/game-music-emu/gme/Snes_Spc.cpp
@@ -0,0 +1,380 @@
+// SPC emulation support: init, sample buffering, reset, SPC loading
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Snes_Spc.h"
+
+#include <string.h>
+
+/* Copyright (C) 2004-2007 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+#define RAM         (m.ram.ram)
+#define REGS        (m.smp_regs [0])
+#define REGS_IN     (m.smp_regs [1])
+
+// (n ? n : 256)
+#define IF_0_THEN_256( n ) ((uint8_t) ((n) - 1) + 1)
+
+
+//// Init
+
+blargg_err_t Snes_Spc::init()
+{
+	memset( &m, 0, sizeof m );
+	dsp.init( RAM );
+	
+	m.tempo = tempo_unit;
+	
+	// Most SPC music doesn't need ROM, and almost all the rest only rely
+	// on these two bytes
+	m.rom [0x3E] = 0xFF;
+	m.rom [0x3F] = 0xC0;
+	
+	static unsigned char const cycle_table [128] =
+	{//   01   23   45   67   89   AB   CD   EF
+	    0x28,0x47,0x34,0x36,0x26,0x54,0x54,0x68, // 0
+	    0x48,0x47,0x45,0x56,0x55,0x65,0x22,0x46, // 1
+	    0x28,0x47,0x34,0x36,0x26,0x54,0x54,0x74, // 2
+	    0x48,0x47,0x45,0x56,0x55,0x65,0x22,0x38, // 3
+	    0x28,0x47,0x34,0x36,0x26,0x44,0x54,0x66, // 4
+	    0x48,0x47,0x45,0x56,0x55,0x45,0x22,0x43, // 5
+	    0x28,0x47,0x34,0x36,0x26,0x44,0x54,0x75, // 6
+	    0x48,0x47,0x45,0x56,0x55,0x55,0x22,0x36, // 7
+	    0x28,0x47,0x34,0x36,0x26,0x54,0x52,0x45, // 8
+	    0x48,0x47,0x45,0x56,0x55,0x55,0x22,0xC5, // 9
+	    0x38,0x47,0x34,0x36,0x26,0x44,0x52,0x44, // A
+	    0x48,0x47,0x45,0x56,0x55,0x55,0x22,0x34, // B
+	    0x38,0x47,0x45,0x47,0x25,0x64,0x52,0x49, // C
+	    0x48,0x47,0x56,0x67,0x45,0x55,0x22,0x83, // D
+	    0x28,0x47,0x34,0x36,0x24,0x53,0x43,0x40, // E
+	    0x48,0x47,0x45,0x56,0x34,0x54,0x22,0x60, // F
+	};
+	
+	// unpack cycle table
+	for ( int i = 0; i < 128; i++ )
+	{
+		int n = cycle_table [i];
+		m.cycle_table [i * 2 + 0] = n >> 4;
+		m.cycle_table [i * 2 + 1] = n & 0x0F;
+	}
+	
+	#if SPC_LESS_ACCURATE
+		memcpy( reg_times, reg_times_, sizeof reg_times );
+	#endif
+	
+	reset();
+	return 0;
+}
+
+void Snes_Spc::init_rom( uint8_t const in [rom_size] )
+{
+	memcpy( m.rom, in, sizeof m.rom );
+}
+
+void Snes_Spc::set_tempo( int t )
+{
+	m.tempo = t;
+	int const timer2_shift = 4; // 64 kHz
+	int const other_shift  = 3; //  8 kHz
+	
+	#if SPC_DISABLE_TEMPO
+		m.timers [2].prescaler = timer2_shift;
+		m.timers [1].prescaler = timer2_shift + other_shift;
+		m.timers [0].prescaler = timer2_shift + other_shift;
+	#else
+		if ( !t )
+			t = 1;
+		int const timer2_rate  = 1 << timer2_shift;
+		int rate = (timer2_rate * tempo_unit + (t >> 1)) / t;
+		if ( rate < timer2_rate / 4 )
+			rate = timer2_rate / 4; // max 4x tempo
+		m.timers [2].prescaler = rate;
+		m.timers [1].prescaler = rate << other_shift;
+		m.timers [0].prescaler = rate << other_shift;
+	#endif
+}
+
+// Timer registers have been loaded. Applies these to the timers. Does not
+// reset timer prescalers or dividers.
+void Snes_Spc::timers_loaded()
+{
+	int i;
+	for ( i = 0; i < timer_count; i++ )
+	{
+		Timer* t = &m.timers [i];
+		t->period  = IF_0_THEN_256( REGS [r_t0target + i] );
+		t->enabled = REGS [r_control] >> i & 1;
+		t->counter = REGS_IN [r_t0out + i] & 0x0F;
+	}
+	
+	set_tempo( m.tempo );
+}
+
+// Loads registers from unified 16-byte format
+void Snes_Spc::load_regs( uint8_t const in [reg_count] )
+{
+	memcpy( REGS, in, reg_count );
+	memcpy( REGS_IN, REGS, reg_count );
+	
+	// These always read back as 0
+	REGS_IN [r_test    ] = 0;
+	REGS_IN [r_control ] = 0;
+	REGS_IN [r_t0target] = 0;
+	REGS_IN [r_t1target] = 0;
+	REGS_IN [r_t2target] = 0;
+}
+
+// RAM was just loaded from SPC, with $F0-$FF containing SMP registers
+// and timer counts. Copies these to proper registers.
+void Snes_Spc::ram_loaded()
+{
+	m.rom_enabled = 0;
+	load_regs( &RAM [0xF0] );
+	
+	// Put STOP instruction around memory to catch PC underflow/overflow
+	memset( m.ram.padding1,      cpu_pad_fill, sizeof m.ram.padding1 );
+	memset( m.ram.ram + 0x10000, cpu_pad_fill, sizeof m.ram.padding1 );
+}
+
+// Registers were just loaded. Applies these new values.
+void Snes_Spc::regs_loaded()
+{
+	enable_rom( REGS [r_control] & 0x80 );
+	timers_loaded();
+}
+
+void Snes_Spc::reset_time_regs()
+{
+	m.cpu_error     = 0;
+	m.echo_accessed = 0;
+	m.spc_time      = 0;
+	m.dsp_time      = 0;
+	#if SPC_LESS_ACCURATE
+		m.dsp_time = clocks_per_sample + 1;
+	#endif
+	
+	for ( int i = 0; i < timer_count; i++ )
+	{
+		Timer* t = &m.timers [i];
+		t->next_time = 1;
+		t->divider   = 0;
+	}
+	
+	regs_loaded();
+	
+	m.extra_clocks = 0;
+	reset_buf();
+}
+
+void Snes_Spc::reset_common( int timer_counter_init )
+{
+	int i;
+	for ( i = 0; i < timer_count; i++ )
+		REGS_IN [r_t0out + i] = timer_counter_init;
+	
+	// Run IPL ROM
+	memset( &m.cpu_regs, 0, sizeof m.cpu_regs );
+	m.cpu_regs.pc = rom_addr;
+	
+	REGS [r_test   ] = 0x0A;
+	REGS [r_control] = 0xB0; // ROM enabled, clear ports
+	for ( i = 0; i < port_count; i++ )
+		REGS_IN [r_cpuio0 + i] = 0;
+	
+	reset_time_regs();
+}
+
+void Snes_Spc::soft_reset()
+{
+	reset_common( 0 );
+	dsp.soft_reset();
+}
+
+void Snes_Spc::reset()
+{
+	memset( RAM, 0xFF, 0x10000 );
+	ram_loaded();
+	reset_common( 0x0F );
+	dsp.reset();
+}
+
+char const Snes_Spc::signature [signature_size + 1] =
+		"SNES-SPC700 Sound File Data v0.30\x1A\x1A";
+
+blargg_err_t Snes_Spc::load_spc( void const* data, long size )
+{
+	spc_file_t const* const spc = (spc_file_t const*) data;
+	
+	// be sure compiler didn't insert any padding into fle_t
+	assert( sizeof (spc_file_t) == spc_min_file_size + 0x80 );
+	
+	// Check signature and file size
+	if ( size < signature_size || memcmp( spc, signature, 27 ) )
+		return "Not an SPC file";
+	
+	if ( size < spc_min_file_size )
+		return "Corrupt SPC file";
+	
+	// CPU registers
+	m.cpu_regs.pc  = spc->pch * 0x100 + spc->pcl;
+	m.cpu_regs.a   = spc->a;
+	m.cpu_regs.x   = spc->x;
+	m.cpu_regs.y   = spc->y;
+	m.cpu_regs.psw = spc->psw;
+	m.cpu_regs.sp  = spc->sp;
+	
+	// RAM and registers
+	memcpy( RAM, spc->ram, 0x10000 );
+	ram_loaded();
+	
+	// DSP registers
+	dsp.load( spc->dsp );
+	
+	reset_time_regs();
+	
+	return 0;
+}
+
+void Snes_Spc::clear_echo()
+{
+	if ( !(dsp.read( Spc_Dsp::r_flg ) & 0x20) )
+	{
+		int addr = 0x100 * dsp.read( Spc_Dsp::r_esa );
+		int end  = addr + 0x800 * (dsp.read( Spc_Dsp::r_edl ) & 0x0F);
+		if ( end > 0x10000 )
+			end = 0x10000;
+		memset( &RAM [addr], 0xFF, end - addr );
+	}
+}
+
+
+//// Sample output
+
+void Snes_Spc::reset_buf()
+{
+	// Start with half extra buffer of silence
+	sample_t* out = m.extra_buf;
+	while ( out < &m.extra_buf [extra_size / 2] )
+		*out++ = 0;
+	
+	m.extra_pos = out;
+	m.buf_begin = 0;
+	
+	dsp.set_output( 0, 0 );
+}
+
+void Snes_Spc::set_output( sample_t* out, int size )
+{
+	require( (size & 1) == 0 ); // size must be even
+	
+	m.extra_clocks &= clocks_per_sample - 1;
+	if ( out )
+	{
+		sample_t const* out_end = out + size;
+		m.buf_begin = out;
+		m.buf_end   = out_end;
+		
+		// Copy extra to output
+		sample_t const* in = m.extra_buf;
+		while ( in < m.extra_pos && out < out_end )
+			*out++ = *in++;
+		
+		// Handle output being full already
+		if ( out >= out_end )
+		{
+			// Have DSP write to remaining extra space
+			out     = dsp.extra();
+			out_end = &dsp.extra() [extra_size];
+			
+			// Copy any remaining extra samples as if DSP wrote them
+			while ( in < m.extra_pos )
+				*out++ = *in++;
+			assert( out <= out_end );
+		}
+		
+		dsp.set_output( out, out_end - out );
+	}
+	else
+	{
+		reset_buf();
+	}
+}
+
+void Snes_Spc::save_extra()
+{
+	// Get end pointers
+	sample_t const* main_end = m.buf_end;     // end of data written to buf
+	sample_t const* dsp_end  = dsp.out_pos(); // end of data written to dsp.extra()
+	if ( m.buf_begin <= dsp_end && dsp_end <= main_end )
+	{
+		main_end = dsp_end;
+		dsp_end  = dsp.extra(); // nothing in DSP's extra
+	}
+	
+	// Copy any extra samples at these ends into extra_buf
+	sample_t* out = m.extra_buf;
+	sample_t const* in;
+	for ( in = m.buf_begin + sample_count(); in < main_end; in++ )
+		*out++ = *in;
+	for ( in = dsp.extra(); in < dsp_end ; in++ )
+		*out++ = *in;
+	
+	m.extra_pos = out;
+	assert( out <= &m.extra_buf [extra_size] );
+}
+
+blargg_err_t Snes_Spc::play( int count, sample_t* out )
+{
+	require( (count & 1) == 0 ); // must be even
+	if ( count )
+	{
+		set_output( out, count );
+		end_frame( count * (clocks_per_sample / 2) );
+	}
+	
+	const char* err = m.cpu_error;
+	m.cpu_error = 0;
+	return err;
+}
+
+blargg_err_t Snes_Spc::skip( int count )
+{
+	#if SPC_LESS_ACCURATE
+	if ( count > 2 * sample_rate * 2 )
+	{
+		set_output( 0, 0 );
+		
+		// Skip a multiple of 4 samples
+		time_t end = count;
+		count = (count & 3) + 1 * sample_rate * 2;
+		end = (end - count) * (clocks_per_sample / 2);
+		
+		m.skipped_kon  = 0;
+		m.skipped_koff = 0;
+		
+		// Preserve DSP and timer synchronization
+		// TODO: verify that this really preserves it
+		int old_dsp_time = m.dsp_time + m.spc_time;
+		m.dsp_time = end - m.spc_time + skipping_time;
+		end_frame( end );
+		m.dsp_time = m.dsp_time - skipping_time + old_dsp_time;
+		
+		dsp.write( Spc_Dsp::r_koff, m.skipped_koff & ~m.skipped_kon );
+		dsp.write( Spc_Dsp::r_kon , m.skipped_kon );
+		clear_echo();
+	}
+	#endif
+	
+	return play( count, 0 );
+}
diff --git a/libraries/game-music-emu/gme/Snes_Spc.h b/libraries/game-music-emu/gme/Snes_Spc.h
new file mode 100644
index 000000000..68c780ab7
--- /dev/null
+++ b/libraries/game-music-emu/gme/Snes_Spc.h
@@ -0,0 +1,283 @@
+// SNES SPC-700 APU emulator
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef SNES_SPC_H
+#define SNES_SPC_H
+
+#include "Spc_Dsp.h"
+#include "blargg_endian.h"
+
+#include <stdint.h>
+
+struct Snes_Spc {
+public:
+	// Must be called once before using
+	blargg_err_t init();
+	
+	// Sample pairs generated per second
+	enum { sample_rate = 32000 };
+	
+// Emulator use
+	
+	// Sets IPL ROM data. Library does not include ROM data. Most SPC music files
+	// don't need ROM, but a full emulator must provide this.
+	enum { rom_size = 0x40 };
+	void init_rom( uint8_t const rom [rom_size] );
+
+	// Sets destination for output samples
+	typedef short sample_t;
+	void set_output( sample_t* out, int out_size );
+
+	// Number of samples written to output since last set
+	int sample_count() const;
+
+	// Resets SPC to power-on state. This resets your output buffer, so you must
+	// call set_output() after this.
+	void reset();
+
+	// Emulates pressing reset switch on SNES. This resets your output buffer, so
+	// you must call set_output() after this.
+	void soft_reset();
+
+	// 1024000 SPC clocks per second, sample pair every 32 clocks
+	typedef int time_t;
+	enum { clock_rate = 1024000 };
+	enum { clocks_per_sample = 32 };
+	
+	// Emulated port read/write at specified time
+	enum { port_count = 4 };
+	int  read_port ( time_t, int port );
+	void write_port( time_t, int port, int data );
+
+	// Runs SPC to end_time and starts a new time frame at 0
+	void end_frame( time_t end_time );
+	
+// Sound control
+	
+	// Mutes voices corresponding to non-zero bits in mask (issues repeated KOFF events).
+	// Reduces emulation accuracy.
+	enum { voice_count = 8 };
+	void mute_voices( int mask );
+	
+	// If true, prevents channels and global volumes from being phase-negated.
+	// Only supported by fast DSP.
+	void disable_surround( bool disable = true );
+	
+	// Sets tempo, where tempo_unit = normal, tempo_unit / 2 = half speed, etc.
+	enum { tempo_unit = 0x100 };
+	void set_tempo( int );
+
+// SPC music files
+
+	// Loads SPC data into emulator
+	enum { spc_min_file_size = 0x10180 };
+	enum { spc_file_size     = 0x10200 };
+	blargg_err_t load_spc( void const* in, long size );
+	
+	// Clears echo region. Useful after loading an SPC as many have garbage in echo.
+	void clear_echo();
+
+	// Plays for count samples and write samples to out. Discards samples if out
+	// is NULL. Count must be a multiple of 2 since output is stereo.
+	blargg_err_t play( int count, sample_t* out );
+	
+	// Skips count samples. Several times faster than play() when using fast DSP.
+	blargg_err_t skip( int count );
+	
+// State save/load (only available with accurate DSP)
+
+#if !SPC_NO_COPY_STATE_FUNCS
+	// Saves/loads state
+	enum { state_size = 67 * 1024L }; // maximum space needed when saving
+	typedef Spc_Dsp::copy_func_t copy_func_t;
+	void copy_state( unsigned char** io, copy_func_t );
+	
+	// Writes minimal header to spc_out
+	static void init_header( void* spc_out );
+
+	// Saves emulator state as SPC file data. Writes spc_file_size bytes to spc_out.
+	// Does not set up SPC header; use init_header() for that.
+	void save_spc( void* spc_out );
+
+	// Returns true if new key-on events occurred since last check. Useful for
+	// trimming silence while saving an SPC.
+	bool check_kon();
+#endif
+
+public:
+	// TODO: document
+	struct regs_t
+	{
+		uint16_t pc;
+		uint8_t  a;
+		uint8_t  x;
+		uint8_t  y;
+		uint8_t  psw;
+		uint8_t  sp;
+	};
+	regs_t& smp_regs() { return m.cpu_regs; }
+	
+	uint8_t* smp_ram() { return m.ram.ram; }
+	
+	void run_until( time_t t ) { run_until_( t ); }
+public:
+	BLARGG_DISABLE_NOTHROW
+	
+	// Time relative to m_spc_time. Speeds up code a bit by eliminating need to
+	// constantly add m_spc_time to time from CPU. CPU uses time that ends at
+	// 0 to eliminate reloading end time every instruction. It pays off.
+	typedef int rel_time_t;
+	
+	struct Timer
+	{
+		rel_time_t next_time; // time of next event
+		int prescaler;
+		int period;
+		int divider;
+		int enabled;
+		int counter;
+	};
+	enum { reg_count = 0x10 };
+	enum { timer_count = 3 };
+	enum { extra_size = Spc_Dsp::extra_size };
+	
+	enum { signature_size = 35 };
+	
+private:
+	Spc_Dsp dsp;
+	
+	#if SPC_LESS_ACCURATE
+		static signed char const reg_times_ [256];
+		signed char reg_times [256];
+	#endif
+	
+	struct state_t
+	{
+		Timer timers [timer_count];
+		
+		uint8_t smp_regs [2] [reg_count];
+		
+		regs_t cpu_regs;
+		
+		rel_time_t  dsp_time;
+		time_t      spc_time;
+		bool        echo_accessed;
+		
+		int         tempo;
+		int         skipped_kon;
+		int         skipped_koff;
+		const char* cpu_error;
+		
+		int         extra_clocks;
+		sample_t*   buf_begin;
+		sample_t const* buf_end;
+		sample_t*   extra_pos;
+		sample_t    extra_buf [extra_size];
+		
+		int         rom_enabled;
+		uint8_t     rom    [rom_size];
+		uint8_t     hi_ram [rom_size];
+		
+		unsigned char cycle_table [256];
+		
+		struct
+		{
+			// padding to neutralize address overflow -- but this is
+			// still undefined behavior! TODO: remove and instead properly
+			// guard usage of emulated memory
+			uint8_t padding1 [0x100];
+			alignas(uint16_t) uint8_t ram      [0x10000 + 0x100];
+		} ram;
+	};
+	state_t m;
+	
+	enum { rom_addr = 0xFFC0 };
+	
+	enum { skipping_time = 127 };
+	
+	// Value that padding should be filled with
+	enum { cpu_pad_fill = 0xFF };
+	
+	enum {
+        r_test     = 0x0, r_control  = 0x1,
+        r_dspaddr  = 0x2, r_dspdata  = 0x3,
+        r_cpuio0   = 0x4, r_cpuio1   = 0x5,
+        r_cpuio2   = 0x6, r_cpuio3   = 0x7,
+        r_f8       = 0x8, r_f9       = 0x9,
+        r_t0target = 0xA, r_t1target = 0xB, r_t2target = 0xC,
+        r_t0out    = 0xD, r_t1out    = 0xE, r_t2out    = 0xF
+	};
+	
+	void timers_loaded();
+	void enable_rom( int enable );
+	void reset_buf();
+	void save_extra();
+	void load_regs( uint8_t const in [reg_count] );
+	void ram_loaded();
+	void regs_loaded();
+	void reset_time_regs();
+	void reset_common( int timer_counter_init );
+	
+	Timer* run_timer_      ( Timer* t, rel_time_t );
+	Timer* run_timer       ( Timer* t, rel_time_t );
+	int dsp_read           ( rel_time_t );
+	void dsp_write         ( int data, rel_time_t );
+	void cpu_write_smp_reg_( int data, rel_time_t, uint16_t addr );
+	void cpu_write_smp_reg ( int data, rel_time_t, uint16_t addr );
+	void cpu_write_high    ( int data, uint8_t i );
+	void cpu_write         ( int data, uint16_t addr, rel_time_t );
+	int cpu_read_smp_reg   ( int i, rel_time_t );
+	int cpu_read           ( uint16_t addr, rel_time_t );
+	unsigned CPU_mem_bit   ( uint16_t pc, rel_time_t );
+	
+	bool check_echo_access ( int addr );
+	uint8_t* run_until_( time_t end_time );
+	
+	struct spc_file_t
+	{
+		char    signature [signature_size];
+		uint8_t has_id666;
+		uint8_t version;
+		uint8_t pcl, pch;
+		uint8_t a;
+		uint8_t x;
+		uint8_t y;
+		uint8_t psw;
+		uint8_t sp;
+		char    text [212];
+		uint8_t ram [0x10000];
+		uint8_t dsp [128];
+		uint8_t unused [0x40];
+		uint8_t ipl_rom [0x40];
+	};
+
+	static char const signature [signature_size + 1];
+	
+	void save_regs( uint8_t out [reg_count] );
+};
+
+#include <assert.h>
+
+inline int Snes_Spc::sample_count() const { return (m.extra_clocks >> 5) * 2; }
+
+inline int Snes_Spc::read_port( time_t t, int port )
+{
+	assert( (unsigned) port < port_count );
+	return run_until_( t ) [port];
+}
+
+inline void Snes_Spc::write_port( time_t t, int port, int data )
+{
+	assert( (unsigned) port < port_count );
+	run_until_( t ) [0x10 + port] = data;
+}
+
+inline void Snes_Spc::mute_voices( int mask ) { dsp.mute_voices( mask ); }
+	
+inline void Snes_Spc::disable_surround( bool disable ) { dsp.disable_surround( disable ); }
+
+#if !SPC_NO_COPY_STATE_FUNCS
+inline bool Snes_Spc::check_kon() { return dsp.check_kon(); }
+#endif
+
+#endif
diff --git a/libraries/game-music-emu/gme/Spc_Cpu.cpp b/libraries/game-music-emu/gme/Spc_Cpu.cpp
new file mode 100644
index 000000000..998fe121b
--- /dev/null
+++ b/libraries/game-music-emu/gme/Spc_Cpu.cpp
@@ -0,0 +1,549 @@
+// Core SPC emulation: CPU, timers, SMP registers, memory
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Snes_Spc.h"
+
+#include <string.h>
+
+/* Copyright (C) 2004-2007 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+#define RAM         (m.ram.ram)
+#define REGS        (m.smp_regs [0])
+#define REGS_IN     (m.smp_regs [1])
+
+// (n ? n : 256)
+#define IF_0_THEN_256( n ) ((uint8_t) ((n) - 1) + 1)
+
+// Note: SPC_MORE_ACCURACY exists mainly so I can run my validation tests, which
+// do crazy echo buffer accesses.
+#ifndef SPC_MORE_ACCURACY
+	#define SPC_MORE_ACCURACY 0
+#endif
+
+#ifdef BLARGG_ENABLE_OPTIMIZER
+	#include BLARGG_ENABLE_OPTIMIZER
+#endif
+
+
+//// Timers
+
+#if SPC_DISABLE_TEMPO
+	#define TIMER_DIV( t, n ) ((n) >> t->prescaler)
+	#define TIMER_MUL( t, n ) ((n) << t->prescaler)
+#else
+	#define TIMER_DIV( t, n ) ((n) / t->prescaler)
+	#define TIMER_MUL( t, n ) ((n) * t->prescaler)
+#endif
+
+Snes_Spc::Timer* Snes_Spc::run_timer_( Timer* t, rel_time_t time )
+{
+	int elapsed = TIMER_DIV( t, time - t->next_time ) + 1;
+	t->next_time += TIMER_MUL( t, elapsed );
+	
+	if ( t->enabled )
+	{
+		int remain = IF_0_THEN_256( t->period - t->divider );
+		int divider = t->divider + elapsed;
+		int over = elapsed - remain;
+		if ( over >= 0 )
+		{
+			int n = over / t->period;
+			t->counter = (t->counter + 1 + n) & 0x0F;
+			divider = over - n * t->period;
+		}
+		t->divider = (uint8_t) divider;
+	}
+	return t;
+}
+
+inline Snes_Spc::Timer* Snes_Spc::run_timer( Timer* t, rel_time_t time )
+{
+	if ( time >= t->next_time )
+		t = run_timer_( t, time );
+	return t;
+}
+
+
+//// ROM
+
+void Snes_Spc::enable_rom( int enable )
+{
+	if ( m.rom_enabled != enable )
+	{
+		m.rom_enabled = enable;
+		if ( enable )
+			memcpy( m.hi_ram, &RAM [rom_addr], sizeof m.hi_ram );
+		memcpy( &RAM [rom_addr], (enable ? m.rom : m.hi_ram), rom_size );
+		// TODO: ROM can still get overwritten when DSP writes to echo buffer
+	}
+}
+
+
+//// DSP
+
+#if SPC_LESS_ACCURATE
+	int const max_reg_time = 29;
+	
+	signed char const Snes_Spc::reg_times_ [256] =
+	{
+		 -1,  0,-11,-10,-15,-11, -2, -2,  4,  3, 14, 14, 26, 26, 14, 22,
+		  2,  3,  0,  1,-12,  0,  1,  1,  7,  6, 14, 14, 27, 14, 14, 23,
+		  5,  6,  3,  4, -1,  3,  4,  4, 10,  9, 14, 14, 26, -5, 14, 23,
+		  8,  9,  6,  7,  2,  6,  7,  7, 13, 12, 14, 14, 27, -4, 14, 24,
+		 11, 12,  9, 10,  5,  9, 10, 10, 16, 15, 14, 14, -2, -4, 14, 24,
+		 14, 15, 12, 13,  8, 12, 13, 13, 19, 18, 14, 14, -2,-36, 14, 24,
+		 17, 18, 15, 16, 11, 15, 16, 16, 22, 21, 14, 14, 28, -3, 14, 25,
+		 20, 21, 18, 19, 14, 18, 19, 19, 25, 24, 14, 14, 14, 29, 14, 25,
+		 
+		 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+		 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+		 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+		 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+		 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+		 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+		 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+		 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+	};
+	
+	#define RUN_DSP( time, offset ) \
+		int count = (time) - (offset) - m.dsp_time;\
+		if ( count >= 0 )\
+		{\
+			int clock_count = (count & ~(clocks_per_sample - 1)) + clocks_per_sample;\
+			m.dsp_time += clock_count;\
+			dsp.run( clock_count );\
+		}
+#else
+	#define RUN_DSP( time, offset ) \
+		{\
+			int count = (time) - m.dsp_time;\
+			if ( !SPC_MORE_ACCURACY || count )\
+			{\
+				assert( count > 0 );\
+				m.dsp_time = (time);\
+				dsp.run( count );\
+			}\
+		}
+#endif
+
+int Snes_Spc::dsp_read( rel_time_t time )
+{
+	RUN_DSP( time, reg_times [REGS [r_dspaddr] & 0x7F] );
+	
+	int result = dsp.read( REGS [r_dspaddr] & 0x7F );
+	
+	#ifdef SPC_DSP_READ_HOOK
+		SPC_DSP_READ_HOOK( spc_time + time, (REGS [r_dspaddr] & 0x7F), result );
+	#endif
+	
+	return result;
+}
+
+inline void Snes_Spc::dsp_write( int data, rel_time_t time )
+{
+	RUN_DSP( time, reg_times [REGS [r_dspaddr]] )
+	#if SPC_LESS_ACCURATE
+		else if ( m.dsp_time == skipping_time )
+		{
+			int r = REGS [r_dspaddr];
+			if ( r == Spc_Dsp::r_kon )
+				m.skipped_kon |= data & ~dsp.read( Spc_Dsp::r_koff );
+			
+			if ( r == Spc_Dsp::r_koff )
+			{
+				m.skipped_koff |= data;
+				m.skipped_kon &= ~data;
+			}
+		}
+	#endif
+	
+	#ifdef SPC_DSP_WRITE_HOOK
+		SPC_DSP_WRITE_HOOK( m.spc_time + time, REGS [r_dspaddr], (uint8_t) data );
+	#endif
+	
+	if ( REGS [r_dspaddr] <= 0x7F )
+		dsp.write( REGS [r_dspaddr], data );
+	else if ( !SPC_MORE_ACCURACY )
+		debug_printf( "SPC wrote to DSP register > $7F\n" );
+}
+
+
+//// Memory access extras
+
+#if SPC_MORE_ACCURACY
+	#define MEM_ACCESS( time, addr ) \
+	{\
+		if ( time >= m.dsp_time )\
+		{\
+			RUN_DSP( time, max_reg_time );\
+		}\
+	}
+#elif !defined (NDEBUG)
+	// Debug-only check for read/write within echo buffer, since this might result in
+	// inaccurate emulation due to the DSP not being caught up to the present.
+	
+	bool Snes_Spc::check_echo_access( int addr )
+	{
+		if ( !(dsp.read( Spc_Dsp::r_flg ) & 0x20) )
+		{
+			int start = 0x100 * dsp.read( Spc_Dsp::r_esa );
+			int size  = 0x800 * (dsp.read( Spc_Dsp::r_edl ) & 0x0F);
+			int end   = start + (size ? size : 4);
+			if ( start <= addr && addr < end )
+			{
+				if ( !m.echo_accessed )
+				{
+					m.echo_accessed = 1;
+					return true;
+				}
+			}
+		}
+		return false;
+	}
+	
+	#define MEM_ACCESS( time, addr ) check( !check_echo_access( (uint16_t) addr ) );
+#else
+	#define MEM_ACCESS( time, addr )
+#endif
+
+
+//// CPU write
+
+#if SPC_MORE_ACCURACY
+static unsigned char const glitch_probs [3] [256] =
+{
+	0xC3,0x92,0x5B,0x1C,0xD1,0x92,0x5B,0x1C,0xDB,0x9C,0x72,0x18,0xCD,0x5C,0x38,0x0B,
+	0xE1,0x9C,0x74,0x17,0xCF,0x75,0x45,0x0C,0xCF,0x6E,0x4A,0x0D,0xA3,0x3A,0x1D,0x08,
+	0xDB,0xA0,0x82,0x19,0xD9,0x73,0x3C,0x0E,0xCB,0x76,0x52,0x0B,0xA5,0x46,0x1D,0x09,
+	0xDA,0x74,0x55,0x0F,0xA2,0x3F,0x21,0x05,0x9A,0x40,0x20,0x07,0x63,0x1E,0x10,0x01,
+	0xDF,0xA9,0x85,0x1D,0xD3,0x84,0x4B,0x0E,0xCF,0x6F,0x49,0x0F,0xB3,0x48,0x1E,0x05,
+	0xD8,0x77,0x52,0x12,0xB7,0x49,0x23,0x06,0xAA,0x45,0x28,0x07,0x7D,0x28,0x0F,0x07,
+	0xCC,0x7B,0x4A,0x0E,0xB2,0x4F,0x24,0x07,0xAD,0x43,0x2C,0x06,0x86,0x29,0x11,0x07,
+	0xAE,0x48,0x1F,0x0A,0x76,0x21,0x19,0x05,0x76,0x21,0x14,0x05,0x44,0x11,0x0B,0x01,
+	0xE7,0xAD,0x96,0x23,0xDC,0x86,0x59,0x0E,0xDC,0x7C,0x5F,0x15,0xBB,0x53,0x2E,0x09,
+	0xD6,0x7C,0x4A,0x16,0xBB,0x4A,0x25,0x08,0xB3,0x4F,0x28,0x0B,0x8E,0x23,0x15,0x08,
+	0xCF,0x7F,0x57,0x11,0xB5,0x4A,0x23,0x0A,0xAA,0x42,0x28,0x05,0x7D,0x22,0x12,0x03,
+	0xA6,0x49,0x28,0x09,0x82,0x2B,0x0D,0x04,0x7A,0x20,0x0F,0x04,0x3D,0x0F,0x09,0x03,
+	0xD1,0x7C,0x4C,0x0F,0xAF,0x4E,0x21,0x09,0xA8,0x46,0x2A,0x07,0x85,0x1F,0x0E,0x07,
+	0xA6,0x3F,0x26,0x07,0x7C,0x24,0x14,0x07,0x78,0x22,0x16,0x04,0x46,0x12,0x0A,0x02,
+	0xA6,0x41,0x2C,0x0A,0x7E,0x28,0x11,0x05,0x73,0x1B,0x14,0x05,0x3D,0x11,0x0A,0x02,
+	0x70,0x22,0x17,0x05,0x48,0x13,0x08,0x03,0x3C,0x07,0x0D,0x07,0x26,0x07,0x06,0x01,
+	
+	0xE0,0x9F,0xDA,0x7C,0x4F,0x18,0x28,0x0D,0xE9,0x9F,0xDA,0x7C,0x4F,0x18,0x1F,0x07,
+	0xE6,0x97,0xD8,0x72,0x64,0x13,0x26,0x09,0xDC,0x67,0xA9,0x38,0x21,0x07,0x15,0x06,
+	0xE9,0x91,0xD2,0x6B,0x63,0x14,0x2B,0x0E,0xD6,0x61,0xB7,0x41,0x2B,0x0E,0x10,0x09,
+	0xCF,0x59,0xB0,0x2F,0x35,0x08,0x0F,0x07,0xB6,0x30,0x7A,0x21,0x17,0x07,0x09,0x03,
+	0xE7,0xA3,0xE5,0x6B,0x65,0x1F,0x34,0x09,0xD8,0x6B,0xBE,0x45,0x27,0x07,0x10,0x07,
+	0xDA,0x54,0xB1,0x39,0x2E,0x0E,0x17,0x08,0xA9,0x3C,0x86,0x22,0x16,0x06,0x07,0x03,
+	0xD4,0x51,0xBC,0x3D,0x38,0x0A,0x13,0x06,0xB2,0x37,0x79,0x1C,0x17,0x05,0x0E,0x06,
+	0xA7,0x31,0x74,0x1C,0x11,0x06,0x0C,0x02,0x6D,0x1A,0x38,0x10,0x0B,0x05,0x06,0x03,
+	0xEB,0x9A,0xE1,0x7A,0x6F,0x13,0x34,0x0E,0xE6,0x75,0xC5,0x45,0x3E,0x0B,0x1A,0x05,
+	0xD8,0x63,0xC1,0x40,0x3C,0x1B,0x19,0x06,0xB3,0x42,0x83,0x29,0x18,0x0A,0x08,0x04,
+	0xD4,0x58,0xBA,0x43,0x3F,0x0A,0x1F,0x09,0xB1,0x33,0x8A,0x1F,0x1F,0x06,0x0D,0x05,
+	0xAF,0x3C,0x7A,0x1F,0x16,0x08,0x0A,0x01,0x72,0x1B,0x52,0x0D,0x0B,0x09,0x06,0x01,
+	0xCF,0x63,0xB7,0x47,0x40,0x10,0x14,0x06,0xC0,0x41,0x96,0x20,0x1C,0x09,0x10,0x05,
+	0xA6,0x35,0x82,0x1A,0x20,0x0C,0x0E,0x04,0x80,0x1F,0x53,0x0F,0x0B,0x02,0x06,0x01,
+	0xA6,0x31,0x81,0x1B,0x1D,0x01,0x08,0x08,0x7B,0x20,0x4D,0x19,0x0E,0x05,0x07,0x03,
+	0x6B,0x17,0x49,0x07,0x0E,0x03,0x0A,0x05,0x37,0x0B,0x1F,0x06,0x04,0x02,0x07,0x01,
+	
+	0xF0,0xD6,0xED,0xAD,0xEC,0xB1,0xEB,0x79,0xAC,0x22,0x47,0x1E,0x6E,0x1B,0x32,0x0A,
+	0xF0,0xD6,0xEA,0xA4,0xED,0xC4,0xDE,0x82,0x98,0x1F,0x50,0x13,0x52,0x15,0x2A,0x0A,
+	0xF1,0xD1,0xEB,0xA2,0xEB,0xB7,0xD8,0x69,0xA2,0x1F,0x5B,0x18,0x55,0x18,0x2C,0x0A,
+	0xED,0xB5,0xDE,0x7E,0xE6,0x85,0xD3,0x59,0x59,0x0F,0x2C,0x09,0x24,0x07,0x15,0x09,
+	0xF1,0xD6,0xEA,0xA0,0xEC,0xBB,0xDA,0x77,0xA9,0x23,0x58,0x14,0x5D,0x12,0x2F,0x09,
+	0xF1,0xC1,0xE3,0x86,0xE4,0x87,0xD2,0x4E,0x68,0x15,0x26,0x0B,0x27,0x09,0x15,0x02,
+	0xEE,0xA6,0xE0,0x5C,0xE0,0x77,0xC3,0x41,0x67,0x1B,0x3C,0x07,0x2A,0x06,0x19,0x07,
+	0xE4,0x75,0xC6,0x43,0xCC,0x50,0x95,0x23,0x35,0x09,0x14,0x04,0x15,0x05,0x0B,0x04,
+	0xEE,0xD6,0xED,0xAD,0xEC,0xB1,0xEB,0x79,0xAC,0x22,0x56,0x14,0x5A,0x12,0x26,0x0A,
+	0xEE,0xBB,0xE7,0x7E,0xE9,0x8D,0xCB,0x49,0x67,0x11,0x34,0x07,0x2B,0x0B,0x14,0x07,
+	0xED,0xA7,0xE5,0x76,0xE3,0x7E,0xC4,0x4B,0x77,0x14,0x34,0x08,0x27,0x07,0x14,0x04,
+	0xE7,0x8B,0xD2,0x4C,0xCA,0x56,0x9E,0x31,0x36,0x0C,0x11,0x07,0x14,0x04,0x0A,0x02,
+	0xF0,0x9B,0xEA,0x6F,0xE5,0x81,0xC4,0x43,0x74,0x10,0x30,0x0B,0x2D,0x08,0x1B,0x06,
+	0xE6,0x83,0xCA,0x48,0xD9,0x56,0xA7,0x23,0x3B,0x09,0x12,0x09,0x15,0x07,0x0A,0x03,
+	0xE5,0x5F,0xCB,0x3C,0xCF,0x48,0x91,0x22,0x31,0x0A,0x17,0x08,0x15,0x04,0x0D,0x02,
+	0xD1,0x43,0x91,0x20,0xA9,0x2D,0x54,0x12,0x17,0x07,0x09,0x02,0x0C,0x04,0x05,0x03,
+};
+#endif
+
+// Read/write handlers are divided into multiple functions to keep rarely-used
+// functionality separate so often-used functionality can be optimized better
+// by compiler.
+
+// If write isn't preceded by read, data has this added to it
+int const no_read_before_write = 0x2000;
+
+void Snes_Spc::cpu_write_smp_reg_( int data, rel_time_t time, uint16_t addr )
+{
+	switch ( addr )
+	{
+	case r_t0target:
+	case r_t1target:
+	case r_t2target: {
+		Timer* t = &m.timers [addr - r_t0target];
+		int period = IF_0_THEN_256( data );
+		if ( t->period != period )
+		{
+			t = run_timer( t, time );
+			#if SPC_MORE_ACCURACY
+				// Insane behavior when target is written just after counter is
+				// clocked and counter matches new period and new period isn't 1, 2, 4, or 8
+				if ( t->divider == (period & 0xFF) &&
+						t->next_time == time + TIMER_MUL( t, 1 ) &&
+						((period - 1) | ~0x0F) & period )
+				{
+					//debug_printf( "SPC pathological timer target write\n" );
+					
+					// If the period is 3, 5, or 9, there's a probability this behavior won't occur,
+					// based on the previous period
+					int prob = 0xFF;
+					int old_period = t->period & 0xFF;
+					if ( period == 3 ) prob = glitch_probs [0] [old_period];
+					if ( period == 5 ) prob = glitch_probs [1] [old_period];
+					if ( period == 9 ) prob = glitch_probs [2] [old_period];
+					
+					// The glitch suppresses incrementing of one of the counter bits, based on
+					// the lowest set bit in the new period
+					int b = 1;
+					while ( !(period & b) )
+						b <<= 1;
+					
+					if ( (rand() >> 4 & 0xFF) <= prob )
+						t->divider = (t->divider - b) & 0xFF;
+				}
+			#endif
+			t->period = period;
+		}
+		break;
+	}
+	
+	case r_t0out:
+	case r_t1out:
+	case r_t2out:
+		if ( !SPC_MORE_ACCURACY )
+			debug_printf( "SPC wrote to counter %d\n", (int) addr - r_t0out );
+		
+		if ( data < no_read_before_write  / 2 )
+			run_timer( &m.timers [addr - r_t0out], time - 1 )->counter = 0;
+		break;
+	
+	// Registers that act like RAM
+	case 0x8:
+	case 0x9:
+		REGS_IN [addr] = (uint8_t) data;
+		break;
+	
+	case r_test:
+		if ( (uint8_t) data != 0x0A )
+			debug_printf( "SPC wrote to test register\n" );
+		break;
+	
+	case r_control:
+		// port clears
+		if ( data & 0x10 )
+		{
+			REGS_IN [r_cpuio0] = 0;
+			REGS_IN [r_cpuio1] = 0;
+		}
+		if ( data & 0x20 )
+		{
+			REGS_IN [r_cpuio2] = 0;
+			REGS_IN [r_cpuio3] = 0;
+		}
+		
+		// timers
+		{
+			for ( int i = 0; i < timer_count; i++ )
+			{
+				Timer* t = &m.timers [i];
+				int enabled = data >> i & 1;
+				if ( t->enabled != enabled )
+				{
+					t = run_timer( t, time );
+					t->enabled = enabled;
+					if ( enabled )
+					{
+						t->divider = 0;
+						t->counter = 0;
+					}
+				}
+			}
+		}
+		enable_rom( data & 0x80 );
+		break;
+	}
+}
+
+void Snes_Spc::cpu_write_smp_reg( int data, rel_time_t time, uint16_t addr )
+{
+	if ( addr == r_dspdata ) // 99%
+		dsp_write( data, time );
+	else
+		cpu_write_smp_reg_( data, time, addr );
+}
+
+void Snes_Spc::cpu_write_high( int data, uint8_t i )
+{
+	assert ( i < rom_size );
+	m.hi_ram [i] = (uint8_t) data;
+	if ( m.rom_enabled )
+		RAM [i + rom_addr] = m.rom [i]; // restore overwritten ROM
+}
+
+void Snes_Spc::cpu_write( int data, uint16_t addr, rel_time_t time )
+{
+	MEM_ACCESS( time, addr )
+	
+	// RAM
+	RAM [addr] = (uint8_t) data;
+	if ( addr >= 0xF0 ) // 64%
+	{
+		const uint16_t reg = addr - 0xF0;
+		// $F0-$FF
+		if ( reg < reg_count ) // 87%
+		{
+			REGS [reg] = (uint8_t) data;
+			
+			// Ports
+			#ifdef SPC_PORT_WRITE_HOOK
+				if ( (unsigned) (reg - r_cpuio0) < port_count )
+					SPC_PORT_WRITE_HOOK( m.spc_time + time, (reg - r_cpuio0),
+							(uint8_t) data, &REGS [r_cpuio0] );
+			#endif
+			
+			// Registers other than $F2 and $F4-$F7
+			if ( reg != 2 && (reg < 4 || reg > 7) ) // 36%
+				cpu_write_smp_reg( data, time, reg );
+		}
+		// High mem/address wrap-around
+		else if ( addr >= rom_addr ) // 1% in IPL ROM area or address wrapped around
+			cpu_write_high( data, addr - rom_addr );
+	}
+}
+
+
+//// CPU read
+
+inline int Snes_Spc::cpu_read_smp_reg( int reg, rel_time_t time )
+{
+	int result = REGS_IN [reg];
+	reg -= r_dspaddr;
+	// DSP addr and data
+	if ( (unsigned) reg <= 1 ) // 4% 0xF2 and 0xF3
+	{
+		result = REGS [r_dspaddr];
+		if ( (unsigned) reg == 1 )
+			result = dsp_read( time ); // 0xF3
+	}
+	return result;
+}
+
+int Snes_Spc::cpu_read( uint16_t addr, rel_time_t time )
+{
+	MEM_ACCESS( time, addr )
+	
+	// RAM
+	int result = RAM [addr];
+	int reg = addr - 0xF0;
+	if ( reg >= 0 ) // 40%
+	{
+		reg -= 0x10;
+		if ( (unsigned) reg >= 0xFF00 ) // 21%
+		{
+			reg += 0x10 - r_t0out;
+			
+			// Timers
+			if ( (unsigned) reg < timer_count ) // 90%
+			{
+				Timer* t = &m.timers [reg];
+				if ( time >= t->next_time )
+					t = run_timer_( t, time );
+				result = t->counter;
+				t->counter = 0;
+			}
+			// Other registers
+			else if ( reg < 0 ) // 10%
+			{
+				result = cpu_read_smp_reg( reg + r_t0out, time );
+			}
+			else // 1%
+			{
+				assert( reg + (r_t0out + 0xF0 - 0x10000) < 0x100 );
+				result = cpu_read( reg + (r_t0out + 0xF0 - 0x10000), time );
+			}
+		}
+	}
+	
+	return result;
+}
+
+
+//// Run
+
+// Prefix and suffix for CPU emulator function
+#define SPC_CPU_RUN_FUNC \
+uint8_t* Snes_Spc::run_until_( time_t end_time )\
+{\
+	rel_time_t rel_time = m.spc_time - end_time;\
+	assert( rel_time <= 0 );\
+	m.spc_time = end_time;\
+	m.dsp_time += rel_time;\
+	m.timers [0].next_time += rel_time;\
+	m.timers [1].next_time += rel_time;\
+	m.timers [2].next_time += rel_time;
+
+#define SPC_CPU_RUN_FUNC_END \
+	m.spc_time += rel_time;\
+	m.dsp_time -= rel_time;\
+	m.timers [0].next_time -= rel_time;\
+	m.timers [1].next_time -= rel_time;\
+	m.timers [2].next_time -= rel_time;\
+	assert( m.spc_time <= end_time );\
+	return &REGS [r_cpuio0];\
+}
+
+int const cpu_lag_max = 12 - 1; // DIV YA,X takes 12 clocks
+
+void Snes_Spc::end_frame( time_t end_time )
+{
+	// Catch CPU up to as close to end as possible. If final instruction
+	// would exceed end, does NOT execute it and leaves m.spc_time < end.
+	if ( end_time > m.spc_time )
+		run_until_( end_time );
+	
+	m.spc_time     -= end_time;
+	m.extra_clocks += end_time;
+	
+	// Greatest number of clocks early that emulation can stop early due to
+	// not being able to execute current instruction without going over
+	// allowed time.
+	assert( -cpu_lag_max <= m.spc_time && m.spc_time <= 0 );
+	
+	// Catch timers up to CPU
+	for ( int i = 0; i < timer_count; i++ )
+		run_timer( &m.timers [i], 0 );
+	
+	// Catch DSP up to CPU
+	if ( m.dsp_time < 0 )
+	{
+		RUN_DSP( 0, max_reg_time );
+	}
+	
+	// Save any extra samples beyond what should be generated
+	if ( m.buf_begin )
+		save_extra();
+}
+
+// Inclusion here allows static memory access functions and better optimization
+#include "Spc_Cpu.h"
diff --git a/libraries/game-music-emu/gme/Spc_Cpu.h b/libraries/game-music-emu/gme/Spc_Cpu.h
new file mode 100644
index 000000000..2dd3e63c2
--- /dev/null
+++ b/libraries/game-music-emu/gme/Spc_Cpu.h
@@ -0,0 +1,1182 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+/* Copyright (C) 2004-2007 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+//// Memory access
+
+#if SPC_MORE_ACCURACY
+	#define SUSPICIOUS_OPCODE( name ) ((void) 0)
+#else
+	#define SUSPICIOUS_OPCODE( name ) debug_printf( "SPC: suspicious opcode: " name "\n" )
+#endif
+
+#define CPU_READ( time, offset, addr )\
+	cpu_read( addr, time + offset )
+
+#define CPU_WRITE( time, offset, addr, data )\
+	cpu_write( data, addr, time + offset )
+
+#if SPC_MORE_ACCURACY
+	#define CPU_READ_TIMER( time, offset, addr, out )\
+		{ out = CPU_READ( time, offset, addr ); }
+
+#else
+	// timers are by far the most common thing read from dp
+	#define CPU_READ_TIMER( time, offset, addr_, out )\
+	{\
+		rel_time_t adj_time = time + offset;\
+		int dp_addr = addr_;\
+		int ti = dp_addr - (r_t0out + 0xF0);\
+		if ( (unsigned) ti < timer_count )\
+		{\
+			Timer* t = &m.timers [ti];\
+			if ( adj_time >= t->next_time )\
+				t = run_timer_( t, adj_time );\
+			out = t->counter;\
+			t->counter = 0;\
+		}\
+		else\
+		{\
+			out = ram [dp_addr];\
+			int i = dp_addr - 0xF0;\
+			if ( (unsigned) i < 0x10 )\
+				out = cpu_read_smp_reg( i, adj_time );\
+		}\
+	}
+#endif
+
+#define TIME_ADJ( n )   (n)
+
+#define READ_TIMER( time, addr, out )       CPU_READ_TIMER( rel_time, TIME_ADJ(time), (addr), out )
+#define READ(  time, addr )                 CPU_READ ( rel_time, TIME_ADJ(time), (addr) )
+#define WRITE( time, addr, data )           CPU_WRITE( rel_time, TIME_ADJ(time), (addr), (data) )
+
+#define DP_ADDR( addr )                     (dp + (addr))
+
+#define READ_DP_TIMER(  time, addr, out )   CPU_READ_TIMER( rel_time, TIME_ADJ(time), DP_ADDR( addr ), out )
+#define READ_DP(  time, addr )              READ ( time, DP_ADDR( addr ) )
+#define WRITE_DP( time, addr, data )        WRITE( time, DP_ADDR( addr ), data )
+
+#define READ_PROG16( addr )                 (RAM [(addr) & 0xffff] | (RAM [((addr) + 1) & 0xffff] << 8))
+
+#define SET_PC( n )     (pc = n)
+#define GET_PC()        (pc)
+#define READ_PC( pc )   (ram [pc])
+#define READ_PC16( pc ) READ_PROG16( pc )
+
+#define SET_SP( v )     (sp = v)
+#define GET_SP()        ((uint8_t) (sp))
+
+#define PUSH16( data )\
+{\
+	PUSH( (data & 0xff00) >> 8 );\
+	PUSH( data & 0xff );\
+}
+
+#define PUSH( data )\
+{\
+	ram [0x100 + sp] = (uint8_t) (data);\
+	--sp;\
+}
+
+#define POP( out )\
+{\
+	++sp;\
+	out = ram [0x100 + sp];\
+}
+
+#define MEM_BIT( rel ) CPU_mem_bit( pc, rel_time + rel )
+
+unsigned Snes_Spc::CPU_mem_bit( uint16_t pc, rel_time_t rel_time )
+{
+	unsigned addr = READ_PC16( pc );
+	unsigned t = READ( 0, addr & 0x1FFF ) >> (addr >> 13);
+	return t << 8 & 0x100;
+}
+
+//// Status flag handling
+
+// Hex value in name to clarify code and bit shifting.
+// Flag stored in indicated variable during emulation
+int const n80 = 0x80; // nz
+int const v40 = 0x40; // psw
+int const p20 = 0x20; // dp
+int const b10 = 0x10; // psw
+int const h08 = 0x08; // psw
+int const i04 = 0x04; // psw
+int const z02 = 0x02; // nz
+int const c01 = 0x01; // c
+
+int const nz_neg_mask = 0x880; // either bit set indicates N flag set
+
+#define GET_PSW( out )\
+{\
+	out = psw & ~(n80 | p20 | z02 | c01);\
+	out |= c  >> 8 & c01;\
+	out |= dp >> 3 & p20;\
+	out |= ((nz >> 4) | nz) & n80;\
+	if ( !(uint8_t) nz ) out |= z02;\
+}
+
+#define SET_PSW( in )\
+{\
+	psw = in;\
+	c   = in << 8;\
+	dp  = in << 3 & 0x100;\
+	nz  = (in << 4 & 0x800) | (~in & z02);\
+}
+
+SPC_CPU_RUN_FUNC
+{
+	uint8_t* const ram = RAM;
+	uint8_t a = m.cpu_regs.a;
+	uint8_t x = m.cpu_regs.x;
+	uint8_t y = m.cpu_regs.y;
+	uint16_t pc;
+	uint8_t sp;
+	int psw;
+	int c;
+	int nz;
+	int dp;
+	
+	SET_PC( m.cpu_regs.pc );
+	SET_SP( m.cpu_regs.sp );
+	SET_PSW( m.cpu_regs.psw );
+	
+	goto loop;
+	
+	
+	// Main loop
+	
+cbranch_taken_loop:
+	pc += (int8_t) ram [pc];
+inc_pc_loop:
+	pc++;
+loop:
+{
+	unsigned opcode;
+	unsigned data;
+	
+	check( (unsigned) a < 0x100 );
+	check( (unsigned) x < 0x100 );
+	check( (unsigned) y < 0x100 );
+	
+	opcode = ram [pc];
+	if ( (rel_time += m.cycle_table [opcode]) > 0 )
+		goto out_of_time;
+	
+	#ifdef SPC_CPU_OPCODE_HOOK
+		SPC_CPU_OPCODE_HOOK( GET_PC(), opcode );
+	#endif
+	/*
+	//SUB_CASE_COUNTER( 1 );
+	#define PROFILE_TIMER_LOOP( op, addr, len )\
+	if ( opcode == op )\
+	{\
+		int cond = (unsigned) ((addr) - 0xFD) < 3 &&\
+				pc [len] == 0xF0 && pc [len+1] == 0xFE - len;\
+		SUB_CASE_COUNTER( op && cond );\
+	}
+	
+	PROFILE_TIMER_LOOP( 0xEC, GET_LE16( pc + 1 ), 3 );
+	PROFILE_TIMER_LOOP( 0xEB, pc [1], 2 );
+	PROFILE_TIMER_LOOP( 0xE4, pc [1], 2 );
+	*/
+	
+	// TODO: if PC is at end of memory, this will get wrong operand (very obscure)
+	pc++;
+	data = ram [pc];
+	switch ( opcode )
+	{
+	
+// Common instructions
+
+#define BRANCH( cond )\
+{\
+	pc++;\
+	pc += (int8_t) data;\
+	if ( cond )\
+		goto loop;\
+	pc -= (int8_t) data;\
+	rel_time -= 2;\
+	goto loop;\
+}
+
+	case 0xF0: // BEQ
+		BRANCH( !(uint8_t) nz ) // 89% taken
+	
+	case 0xD0: // BNE
+		BRANCH( (uint8_t) nz )
+	
+	case 0x3F:{// CALL
+		int old_addr = GET_PC() + 2;
+		SET_PC( READ_PC16( pc ) );
+		PUSH16( old_addr );
+		goto loop;
+	}
+	
+	case 0x6F:// RET
+		{
+			uint8_t l, h;
+			POP( l );
+			POP( h );
+			SET_PC( l | (h << 8) );
+		}
+		goto loop;
+	
+	case 0xE4: // MOV a,dp
+		++pc;
+		// 80% from timer
+		READ_DP_TIMER( 0, data, a = nz );
+		goto loop;
+	
+	case 0xFA:{// MOV dp,dp
+		int temp;
+		READ_DP_TIMER( -2, data, temp );
+		data = temp + no_read_before_write ;
+	}
+	// fall through
+	case 0x8F:{// MOV dp,#imm
+		int temp = READ_PC( pc + 1 );
+		pc += 2;
+		
+		#if !SPC_MORE_ACCURACY
+		{
+			int i = dp + temp;
+			ram [i] = (uint8_t) data;
+			i -= 0xF0;
+			if ( (unsigned) i < 0x10 ) // 76%
+			{
+				REGS [i] = (uint8_t) data;
+				
+				// Registers other than $F2 and $F4-$F7
+				if ( i != 2 && (i < 4 || i > 7)) // 12%
+					cpu_write_smp_reg( data, rel_time, i );
+			}
+		}
+		#else
+			WRITE_DP( 0, temp, data );
+		#endif
+		goto loop;
+	}
+	
+	case 0xC4: // MOV dp,a
+		++pc;
+		#if !SPC_MORE_ACCURACY
+		{
+			int i = dp + data;
+			ram [i] = (uint8_t) a;
+			i -= 0xF0;
+			if ( (unsigned) i < 0x10 ) // 39%
+			{
+				unsigned sel = i - 2;
+				REGS [i] = (uint8_t) a;
+				
+				if ( sel == 1 ) // 51% $F3
+					dsp_write( a, rel_time );
+				else if ( sel > 1 ) // 1% not $F2 or $F3
+					cpu_write_smp_reg_( a, rel_time, i );
+			}
+		}
+		#else
+			WRITE_DP( 0, data, a );
+		#endif
+		goto loop;
+	
+#define CASE( n )   case n:
+
+// Define common address modes based on opcode for immediate mode. Execution
+// ends with data set to the address of the operand.
+#define ADDR_MODES_( op )\
+	CASE( op - 0x02 ) /* (X) */\
+		data = x + dp;\
+		pc--;\
+		goto end_##op;\
+	CASE( op + 0x0F ) /* (dp)+Y */\
+		data = READ_PROG16( data + dp ) + y;\
+		goto end_##op;\
+	CASE( op - 0x01 ) /* (dp+X) */\
+		data = READ_PROG16( ((uint8_t) (data + x)) + dp );\
+		goto end_##op;\
+	CASE( op + 0x0E ) /* abs+Y */\
+		data += y;\
+		goto abs_##op;\
+	CASE( op + 0x0D ) /* abs+X */\
+		data += x;\
+	CASE( op - 0x03 ) /* abs */\
+	abs_##op:\
+		data += 0x100 * READ_PC( ++pc );\
+		goto end_##op;\
+	CASE( op + 0x0C ) /* dp+X */\
+		data = (uint8_t) (data + x);
+
+#define ADDR_MODES_NO_DP( op )\
+	ADDR_MODES_( op )\
+		data += dp;\
+	end_##op:
+
+#define ADDR_MODES( op )\
+	ADDR_MODES_( op )\
+	CASE( op - 0x04 ) /* dp */\
+		data += dp;\
+	end_##op:
+
+// 1. 8-bit Data Transmission Commands. Group I
+
+	ADDR_MODES_NO_DP( 0xE8 ) // MOV A,addr
+		a = nz = READ( 0, data );
+		goto inc_pc_loop;
+	
+	case 0xBF:{// MOV A,(X)+
+		int temp = x + dp;
+		x = (uint8_t) (x + 1);
+		a = nz = READ( -1, temp );
+		goto loop;
+	}
+	
+	case 0xE8: // MOV A,imm
+		a  = data;
+		nz = data;
+		goto inc_pc_loop;
+	
+	case 0xF9: // MOV X,dp+Y
+		data = (uint8_t) (data + y);
+	case 0xF8: // MOV X,dp
+		READ_DP_TIMER( 0, data, x = nz );
+		goto inc_pc_loop;
+	
+	case 0xE9: // MOV X,abs
+		data = READ_PC16( pc );
+		++pc;
+		data = READ( 0, data );
+	case 0xCD: // MOV X,imm
+		x  = data;
+		nz = data;
+		goto inc_pc_loop;
+	
+	case 0xFB: // MOV Y,dp+X
+		data = (uint8_t) (data + x);
+	case 0xEB: // MOV Y,dp
+		// 70% from timer
+		pc++;
+		READ_DP_TIMER( 0, data, y = nz );
+		goto loop;
+	
+	case 0xEC:{// MOV Y,abs
+		int temp = READ_PC16( pc );
+		pc += 2;
+		READ_TIMER( 0, temp, y = nz );
+		//y = nz = READ( 0, temp );
+		goto loop;
+	}
+	
+	case 0x8D: // MOV Y,imm
+		y  = data;
+		nz = data;
+		goto inc_pc_loop;
+	
+// 2. 8-BIT DATA TRANSMISSION COMMANDS, GROUP 2
+
+	ADDR_MODES_NO_DP( 0xC8 ) // MOV addr,A
+		WRITE( 0, data, a );
+		goto inc_pc_loop;
+	
+	{
+		int temp;
+	case 0xCC: // MOV abs,Y
+		temp = y;
+		goto mov_abs_temp;
+	case 0xC9: // MOV abs,X
+		temp = x;
+	mov_abs_temp:
+		WRITE( 0, READ_PC16( pc ), temp );
+		pc += 2;
+		goto loop;
+	}
+	
+	case 0xD9: // MOV dp+Y,X
+		data = (uint8_t) (data + y);
+	case 0xD8: // MOV dp,X
+		WRITE( 0, data + dp, x );
+		goto inc_pc_loop;
+	
+	case 0xDB: // MOV dp+X,Y
+		data = (uint8_t) (data + x);
+	case 0xCB: // MOV dp,Y
+		WRITE( 0, data + dp, y );
+		goto inc_pc_loop;
+
+// 3. 8-BIT DATA TRANSMISSIN COMMANDS, GROUP 3.
+	
+	case 0x7D: // MOV A,X
+		a  = x;
+		nz = x;
+		goto loop;
+	
+	case 0xDD: // MOV A,Y
+		a  = y;
+		nz = y;
+		goto loop;
+	
+	case 0x5D: // MOV X,A
+		x  = a;
+		nz = a;
+		goto loop;
+	
+	case 0xFD: // MOV Y,A
+		y  = a;
+		nz = a;
+		goto loop;
+	
+	case 0x9D: // MOV X,SP
+		x = nz = GET_SP();
+		goto loop;
+	
+	case 0xBD: // MOV SP,X
+		SET_SP( x );
+		goto loop;
+	
+	//case 0xC6: // MOV (X),A (handled by MOV addr,A in group 2)
+	
+	case 0xAF: // MOV (X)+,A
+		WRITE_DP( 0, x, a + no_read_before_write  );
+		x = (uint8_t) (x + 1);
+		goto loop;
+	
+// 5. 8-BIT LOGIC OPERATION COMMANDS
+	
+#define LOGICAL_OP( op, func )\
+	ADDR_MODES( op ) /* addr */\
+		data = READ( 0, data );\
+	case op: /* imm */\
+		nz = a func##= data;\
+		goto inc_pc_loop;\
+	{   unsigned addr;\
+	case op + 0x11: /* X,Y */\
+		data = READ_DP( -2, y );\
+		addr = x + dp;\
+		goto addr_##op;\
+	case op + 0x01: /* dp,dp */\
+		data = READ_DP( -3, data );\
+	case op + 0x10:{/*dp,imm*/\
+		uint16_t addr2 = pc + 1;\
+		pc += 2;\
+		addr = READ_PC( addr2 ) + dp;\
+	}\
+	addr_##op:\
+		nz = data func READ( -1, addr );\
+		WRITE( 0, addr, nz );\
+		goto loop;\
+	}
+	
+	LOGICAL_OP( 0x28, & ); // AND
+	
+	LOGICAL_OP( 0x08, | ); // OR
+	
+	LOGICAL_OP( 0x48, ^ ); // EOR
+	
+// 4. 8-BIT ARITHMETIC OPERATION COMMANDS
+
+	ADDR_MODES( 0x68 ) // CMP addr
+		data = READ( 0, data );
+	case 0x68: // CMP imm
+		nz = a - data;
+		c = ~nz;
+		nz &= 0xFF;
+		goto inc_pc_loop;
+	
+	case 0x79: // CMP (X),(Y)
+		data = READ_DP( -2, y );
+		nz = READ_DP( -1, x ) - data;
+		c = ~nz;
+		nz &= 0xFF;
+		goto loop;
+	
+	case 0x69: // CMP dp,dp
+		data = READ_DP( -3, data );
+	case 0x78: // CMP dp,imm
+		nz = READ_DP( -1, READ_PC( ++pc ) ) - data;
+		c = ~nz;
+		nz &= 0xFF;
+		goto inc_pc_loop;
+	
+	case 0x3E: // CMP X,dp
+		data += dp;
+		goto cmp_x_addr;
+	case 0x1E: // CMP X,abs
+		data = READ_PC16( pc );
+		pc++;
+	cmp_x_addr:
+		data = READ( 0, data );
+	case 0xC8: // CMP X,imm
+		nz = x - data;
+		c = ~nz;
+		nz &= 0xFF;
+		goto inc_pc_loop;
+	
+	case 0x7E: // CMP Y,dp
+		data += dp;
+		goto cmp_y_addr;
+	case 0x5E: // CMP Y,abs
+		data = READ_PC16( pc );
+		pc++;
+	cmp_y_addr:
+		data = READ( 0, data );
+	case 0xAD: // CMP Y,imm
+		nz = y - data;
+		c = ~nz;
+		nz &= 0xFF;
+		goto inc_pc_loop;
+	
+	{
+		int addr;
+	case 0xB9: // SBC (x),(y)
+	case 0x99: // ADC (x),(y)
+		pc--; // compensate for inc later
+		data = READ_DP( -2, y );
+		addr = x + dp;
+		goto adc_addr;
+	case 0xA9: // SBC dp,dp
+	case 0x89: // ADC dp,dp
+		data = READ_DP( -3, data );
+	case 0xB8: // SBC dp,imm
+	case 0x98: // ADC dp,imm
+		addr = READ_PC( ++pc ) + dp;
+	adc_addr:
+		nz = READ( -1, addr );
+		goto adc_data;
+		
+// catch ADC and SBC together, then decode later based on operand
+#undef CASE
+#define CASE( n ) case n: case (n) + 0x20:
+	ADDR_MODES( 0x88 ) // ADC/SBC addr
+		data = READ( 0, data );
+	case 0xA8: // SBC imm
+	case 0x88: // ADC imm
+		addr = -1; // A
+		nz = a;
+	adc_data: {
+		int flags;
+		if ( opcode >= 0xA0 ) // SBC
+			data ^= 0xFF;
+		
+		flags = data ^ nz;
+		nz += data + (c >> 8 & 1);
+		flags ^= nz;
+		
+		psw = (psw & ~(v40 | h08)) |
+				(flags >> 1 & h08) |
+				((flags + 0x80) >> 2 & v40);
+		c = nz;
+		if ( addr < 0 )
+		{
+			a = (uint8_t) nz;
+			goto inc_pc_loop;
+		}
+		WRITE( 0, addr, /*(uint8_t)*/ nz );
+		goto inc_pc_loop;
+	}
+	
+	}
+	
+// 6. ADDITION & SUBTRACTION COMMANDS
+
+#define INC_DEC_REG( reg, op )\
+		nz  = reg op;\
+		reg = (uint8_t) nz;\
+		goto loop;
+
+	case 0xBC: INC_DEC_REG( a, + 1 ) // INC A
+	case 0x3D: INC_DEC_REG( x, + 1 ) // INC X
+	case 0xFC: INC_DEC_REG( y, + 1 ) // INC Y
+	
+	case 0x9C: INC_DEC_REG( a, - 1 ) // DEC A
+	case 0x1D: INC_DEC_REG( x, - 1 ) // DEC X
+	case 0xDC: INC_DEC_REG( y, - 1 ) // DEC Y
+
+	case 0x9B: // DEC dp+X
+	case 0xBB: // INC dp+X
+		data = (uint8_t) (data + x);
+	case 0x8B: // DEC dp
+	case 0xAB: // INC dp
+		data += dp;
+		goto inc_abs;
+	case 0x8C: // DEC abs
+	case 0xAC: // INC abs
+		data = READ_PC16( pc );
+		pc++;
+	inc_abs:
+		nz = (opcode >> 4 & 2) - 1;
+		nz += READ( -1, data );
+		WRITE( 0, data, /*(uint8_t)*/ nz );
+		goto inc_pc_loop;
+	
+// 7. SHIFT, ROTATION COMMANDS
+
+	case 0x5C: // LSR A
+		c = 0;
+	case 0x7C:{// ROR A
+		nz = (c >> 1 & 0x80) | (a >> 1);
+		c = a << 8;
+		a = nz;
+		goto loop;
+	}
+	
+	case 0x1C: // ASL A
+		c = 0;
+	case 0x3C:{// ROL A
+		int temp = c >> 8 & 1;
+		c = a << 1;
+		nz = c | temp;
+		a = (uint8_t) nz;
+		goto loop;
+	}
+	
+	case 0x0B: // ASL dp
+		c = 0;
+		data += dp;
+		goto rol_mem;
+	case 0x1B: // ASL dp+X
+		c = 0;
+	case 0x3B: // ROL dp+X
+		data = (uint8_t) (data + x);
+	case 0x2B: // ROL dp
+		data += dp;
+		goto rol_mem;
+	case 0x0C: // ASL abs
+		c = 0;
+	case 0x2C: // ROL abs
+		data = READ_PC16( pc );
+		pc++;
+	rol_mem:
+		nz = c >> 8 & 1;
+		nz |= (c = READ( -1, data ) << 1);
+		WRITE( 0, data, /*(uint8_t)*/ nz );
+		goto inc_pc_loop;
+	
+	case 0x4B: // LSR dp
+		c = 0;
+		data += dp;
+		goto ror_mem;
+	case 0x5B: // LSR dp+X
+		c = 0;
+	case 0x7B: // ROR dp+X
+		data = (uint8_t) (data + x);
+	case 0x6B: // ROR dp
+		data += dp;
+		goto ror_mem;
+	case 0x4C: // LSR abs
+		c = 0;
+	case 0x6C: // ROR abs
+		data = READ_PC16( pc );
+		pc++;
+	ror_mem: {
+		int temp = READ( -1, data );
+		nz = (c >> 1 & 0x80) | (temp >> 1);
+		c = temp << 8;
+		WRITE( 0, data, nz );
+		goto inc_pc_loop;
+	}
+
+	case 0x9F: // XCN
+		nz = a = (a >> 4) | (uint8_t) (a << 4);
+		goto loop;
+
+// 8. 16-BIT TRANSMISION COMMANDS
+
+	case 0xBA: // MOVW YA,dp
+		a = READ_DP( -2, data );
+		nz = (a & 0x7F) | (a >> 1);
+		y = READ_DP( 0, (uint8_t) (data + 1) );
+		nz |= y;
+		goto inc_pc_loop;
+	
+	case 0xDA: // MOVW dp,YA
+		WRITE_DP( -1, data, a );
+		WRITE_DP( 0, (uint8_t) (data + 1), y + no_read_before_write  );
+		goto inc_pc_loop;
+	
+// 9. 16-BIT OPERATION COMMANDS
+
+	case 0x3A: // INCW dp
+	case 0x1A:{// DECW dp
+		int temp;
+		// low byte
+		data += dp;
+		temp = READ( -3, data );
+		temp += (opcode >> 4 & 2) - 1; // +1 for INCW, -1 for DECW
+		nz = ((temp >> 1) | temp) & 0x7F;
+		WRITE( -2, data, /*(uint8_t)*/ temp );
+		
+		// high byte
+		data = (uint8_t) (data + 1) + dp;
+		temp = (uint8_t) ((temp >> 8) + READ( -1, data ));
+		nz |= temp;
+		WRITE( 0, data, temp );
+		
+		goto inc_pc_loop;
+	}
+		
+	case 0x7A: // ADDW YA,dp
+	case 0x9A:{// SUBW YA,dp
+		int lo = READ_DP( -2, data );
+		int hi = READ_DP( 0, (uint8_t) (data + 1) );
+		int result;
+		int flags;
+		
+		if ( opcode == 0x9A ) // SUBW
+		{
+			lo = (lo ^ 0xFF) + 1;
+			hi ^= 0xFF;
+		}
+		
+		lo += a;
+		result = y + hi + (lo >> 8);
+		flags = hi ^ y ^ result;
+		
+		psw = (psw & ~(v40 | h08)) |
+				(flags >> 1 & h08) |
+				((flags + 0x80) >> 2 & v40);
+		c = result;
+		a = (uint8_t) lo;
+		result = (uint8_t) result;
+		y = result;
+		nz = (((lo >> 1) | lo) & 0x7F) | result;
+		
+		goto inc_pc_loop;
+	}
+	
+	case 0x5A: { // CMPW YA,dp
+		int temp = a - READ_DP( -1, data );
+		nz = ((temp >> 1) | temp) & 0x7F;
+		temp = y + (temp >> 8);
+		temp -= READ_DP( 0, (uint8_t) (data + 1) );
+		nz |= temp;
+		c  = ~temp;
+		nz &= 0xFF;
+		goto inc_pc_loop;
+	}
+	
+// 10. MULTIPLICATION & DIVISON COMMANDS
+
+	case 0xCF: { // MUL YA
+		unsigned temp = y * a;
+		a = (uint8_t) temp;
+		nz = ((temp >> 1) | temp) & 0x7F;
+		y = (uint8_t) (temp >> 8);
+		nz |= y;
+		goto loop;
+	}
+	
+	case 0x9E: // DIV YA,X
+	{
+		unsigned ya = y * 0x100 + a;
+		
+		psw &= ~(h08 | v40);
+		
+		if ( y >= x )
+			psw |= v40;
+		
+		if ( (y & 15) >= (x & 15) )
+			psw |= h08;
+		
+		if ( y < x * 2 )
+		{
+			a = ya / x;
+			y = ya - a * x;
+		}
+		else
+		{
+			a = 255 - (ya - x * 0x200) / (256 - x);
+			y = x   + (ya - x * 0x200) % (256 - x);
+		}
+		
+		nz = (uint8_t) a;
+		a = (uint8_t) a;
+		y = (uint8_t) y;
+		
+		goto loop;
+	}
+	
+// 11. DECIMAL COMPENSATION COMMANDS
+	
+	case 0xDF: // DAA
+		SUSPICIOUS_OPCODE( "DAA" );
+		if ( a > 0x99 || c & 0x100 )
+		{
+			a += 0x60;
+			c = 0x100;
+		}
+		
+		if ( (a & 0x0F) > 9 || psw & h08 )
+			a += 0x06;
+		
+		nz = a;
+		a = (uint8_t) a;
+		goto loop;
+	
+	case 0xBE: // DAS
+		SUSPICIOUS_OPCODE( "DAS" );
+		if ( a > 0x99 || !(c & 0x100) )
+		{
+			a -= 0x60;
+			c = 0;
+		}
+		
+		if ( (a & 0x0F) > 9 || !(psw & h08) )
+			a -= 0x06;
+		
+		nz = a;
+		a = (uint8_t) a;
+		goto loop;
+	
+// 12. BRANCHING COMMANDS
+
+	case 0x2F: // BRA rel
+		pc += (int8_t) data;
+		goto inc_pc_loop;
+	
+	case 0x30: // BMI
+		BRANCH( (nz & nz_neg_mask) )
+	
+	case 0x10: // BPL
+		BRANCH( !(nz & nz_neg_mask) )
+	
+	case 0xB0: // BCS
+		BRANCH( c & 0x100 )
+	
+	case 0x90: // BCC
+		BRANCH( !(c & 0x100) )
+	
+	case 0x70: // BVS
+		BRANCH( psw & v40 )
+	
+	case 0x50: // BVC
+		BRANCH( !(psw & v40) )
+	
+	#define CBRANCH( cond )\
+	{\
+		pc++;\
+		if ( cond )\
+			goto cbranch_taken_loop;\
+		rel_time -= 2;\
+		goto inc_pc_loop;\
+	}
+	
+	case 0x03: // BBS dp.bit,rel
+	case 0x23:
+	case 0x43:
+	case 0x63:
+	case 0x83:
+	case 0xA3:
+	case 0xC3:
+	case 0xE3:
+		CBRANCH( READ_DP( -4, data ) >> (opcode >> 5) & 1 )
+	
+	case 0x13: // BBC dp.bit,rel
+	case 0x33:
+	case 0x53:
+	case 0x73:
+	case 0x93:
+	case 0xB3:
+	case 0xD3:
+	case 0xF3:
+		CBRANCH( !(READ_DP( -4, data ) >> (opcode >> 5) & 1) )
+	
+	case 0xDE: // CBNE dp+X,rel
+		data = (uint8_t) (data + x);
+		// fall through
+	case 0x2E:{// CBNE dp,rel
+		int temp;
+		// 61% from timer
+		READ_DP_TIMER( -4, data, temp );
+		CBRANCH( temp != a )
+	}
+	
+	case 0x6E: { // DBNZ dp,rel
+		unsigned temp = READ_DP( -4, data ) - 1;
+		WRITE_DP( -3, (uint8_t) data, /*(uint8_t)*/ temp + no_read_before_write  );
+		CBRANCH( temp )
+	}
+	
+	case 0xFE: // DBNZ Y,rel
+		y = (uint8_t) (y - 1);
+		BRANCH( y )
+	
+	case 0x1F: // JMP [abs+X]
+		SET_PC( READ_PC16( pc ) + x );
+		// fall through
+	case 0x5F: // JMP abs
+		SET_PC( READ_PC16( pc ) );
+		goto loop;
+	
+// 13. SUB-ROUTINE CALL RETURN COMMANDS
+	
+	case 0x0F:{// BRK
+		int temp;
+		int ret_addr = GET_PC();
+		SUSPICIOUS_OPCODE( "BRK" );
+		SET_PC( READ_PROG16( 0xFFDE ) ); // vector address verified
+		PUSH16( ret_addr );
+		GET_PSW( temp );
+		psw = (psw | b10) & ~i04;
+		PUSH( temp );
+		goto loop;
+	}
+	
+	case 0x4F:{// PCALL offset
+		int ret_addr = GET_PC() + 1;
+		SET_PC( 0xFF00 | data );
+		PUSH16( ret_addr );
+		goto loop;
+	}
+	
+	case 0x01: // TCALL n
+	case 0x11:
+	case 0x21:
+	case 0x31:
+	case 0x41:
+	case 0x51:
+	case 0x61:
+	case 0x71:
+	case 0x81:
+	case 0x91:
+	case 0xA1:
+	case 0xB1:
+	case 0xC1:
+	case 0xD1:
+	case 0xE1:
+	case 0xF1: {
+		int ret_addr = GET_PC();
+		SET_PC( READ_PROG16( 0xFFDE - (opcode >> 3) ) );
+		PUSH16( ret_addr );
+		goto loop;
+	}
+	
+// 14. STACK OPERATION COMMANDS
+
+	{
+		int temp;
+		uint8_t l, h;
+	case 0x7F: // RET1
+		POP (temp);
+		POP (l);
+		POP (h);
+		SET_PC( l | (h << 8) );
+		goto set_psw;
+	case 0x8E: // POP PSW
+		POP( temp );
+	set_psw:
+		SET_PSW( temp );
+		goto loop;
+	}
+	
+	case 0x0D: { // PUSH PSW
+		int temp;
+		GET_PSW( temp );
+		PUSH( temp );
+		goto loop;
+	}
+
+	case 0x2D: // PUSH A
+		PUSH( a );
+		goto loop;
+	
+	case 0x4D: // PUSH X
+		PUSH( x );
+		goto loop;
+	
+	case 0x6D: // PUSH Y
+		PUSH( y );
+		goto loop;
+	
+	case 0xAE: // POP A
+		POP( a );
+		goto loop;
+	
+	case 0xCE: // POP X
+		POP( x );
+		goto loop;
+	
+	case 0xEE: // POP Y
+		POP( y );
+		goto loop;
+	
+// 15. BIT OPERATION COMMANDS
+
+	case 0x02: // SET1
+	case 0x22:
+	case 0x42:
+	case 0x62:
+	case 0x82:
+	case 0xA2:
+	case 0xC2:
+	case 0xE2:
+	case 0x12: // CLR1
+	case 0x32:
+	case 0x52:
+	case 0x72:
+	case 0x92:
+	case 0xB2:
+	case 0xD2:
+	case 0xF2: {
+		int bit = 1 << (opcode >> 5);
+		int mask = ~bit;
+		if ( opcode & 0x10 )
+			bit = 0;
+		data += dp;
+		WRITE( 0, data, (READ( -1, data ) & mask) | bit );
+		goto inc_pc_loop;
+	}
+		
+	case 0x0E: // TSET1 abs
+	case 0x4E: // TCLR1 abs
+		data = READ_PC16( pc );
+		pc += 2;
+		{
+			unsigned temp = READ( -2, data );
+			nz = (uint8_t) (a - temp);
+			temp &= ~a;
+			if ( opcode == 0x0E )
+				temp |= a;
+			WRITE( 0, data, temp );
+		}
+		goto loop;
+	
+	case 0x4A: // AND1 C,mem.bit
+		c &= MEM_BIT( 0 );
+		pc += 2;
+		goto loop;
+	
+	case 0x6A: // AND1 C,/mem.bit
+		c &= ~MEM_BIT( 0 );
+		pc += 2;
+		goto loop;
+	
+	case 0x0A: // OR1 C,mem.bit
+		c |= MEM_BIT( -1 );
+		pc += 2;
+		goto loop;
+	
+	case 0x2A: // OR1 C,/mem.bit
+		c |= ~MEM_BIT( -1 );
+		pc += 2;
+		goto loop;
+	
+	case 0x8A: // EOR1 C,mem.bit
+		c ^= MEM_BIT( -1 );
+		pc += 2;
+		goto loop;
+	
+	case 0xEA: // NOT1 mem.bit
+		data = READ_PC16( pc );
+		pc += 2;
+		{
+			unsigned temp = READ( -1, data & 0x1FFF );
+			temp ^= 1 << (data >> 13);
+			WRITE( 0, data & 0x1FFF, temp );
+		}
+		goto loop;
+	
+	case 0xCA: // MOV1 mem.bit,C
+		data = READ_PC16( pc );
+		pc += 2;
+		{
+			unsigned temp = READ( -2, data & 0x1FFF );
+			unsigned bit = data >> 13;
+			temp = (temp & ~(1 << bit)) | ((c >> 8 & 1) << bit);
+			WRITE( 0, data & 0x1FFF, temp + no_read_before_write  );
+		}
+		goto loop;
+	
+	case 0xAA: // MOV1 C,mem.bit
+		c = MEM_BIT( 0 );
+		pc += 2;
+		goto loop;
+	
+// 16. PROGRAM PSW FLAG OPERATION COMMANDS
+
+	case 0x60: // CLRC
+		c = 0;
+		goto loop;
+		
+	case 0x80: // SETC
+		c = ~0;
+		goto loop;
+	
+	case 0xED: // NOTC
+		c ^= 0x100;
+		goto loop;
+		
+	case 0xE0: // CLRV
+		psw &= ~(v40 | h08);
+		goto loop;
+	
+	case 0x20: // CLRP
+		dp = 0;
+		goto loop;
+	
+	case 0x40: // SETP
+		dp = 0x100;
+		goto loop;
+	
+	case 0xA0: // EI
+		SUSPICIOUS_OPCODE( "EI" );
+		psw |= i04;
+		goto loop;
+	
+	case 0xC0: // DI
+		SUSPICIOUS_OPCODE( "DI" );
+		psw &= ~i04;
+		goto loop;
+	
+// 17. OTHER COMMANDS
+
+	case 0x00: // NOP
+		goto loop;
+	
+	case 0xFF:{// STOP
+		// handle PC wrap-around
+		if ( pc == 0x0000 )
+		{
+			debug_printf( "SPC: PC wrapped around\n" );
+			goto loop;
+		}
+	}
+	// fall through
+	case 0xEF: // SLEEP
+		SUSPICIOUS_OPCODE( "STOP/SLEEP" );
+		--pc;
+		rel_time = 0;
+		m.cpu_error = "SPC emulation error";
+		goto stop;
+	} // switch
+	
+	assert( 0 ); // catch any unhandled instructions
+}
+out_of_time:
+	rel_time -= m.cycle_table [ ram [pc] ]; // undo partial execution of opcode
+stop:
+	
+	// Uncache registers
+	m.cpu_regs.pc = (uint16_t) GET_PC();
+	m.cpu_regs.sp = ( uint8_t) GET_SP();
+	m.cpu_regs.a  = ( uint8_t) a;
+	m.cpu_regs.x  = ( uint8_t) x;
+	m.cpu_regs.y  = ( uint8_t) y;
+	{
+		int temp;
+		GET_PSW( temp );
+		m.cpu_regs.psw = (uint8_t) temp;
+	}
+}
+SPC_CPU_RUN_FUNC_END
diff --git a/libraries/game-music-emu/gme/Spc_Dsp.cpp b/libraries/game-music-emu/gme/Spc_Dsp.cpp
new file mode 100644
index 000000000..51556434d
--- /dev/null
+++ b/libraries/game-music-emu/gme/Spc_Dsp.cpp
@@ -0,0 +1,704 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Spc_Dsp.h"
+
+#include "blargg_endian.h"
+#include <string.h>
+
+/* Copyright (C) 2007 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+#ifdef BLARGG_ENABLE_OPTIMIZER
+	#include BLARGG_ENABLE_OPTIMIZER
+#endif
+
+#if INT_MAX < 0x7FFFFFFF
+	#error "Requires that int type have at least 32 bits"
+#endif
+
+
+// TODO: add to blargg_endian.h
+#define GET_LE16SA( addr )      ((int16_t) GET_LE16( addr ))
+#define GET_LE16A( addr )       GET_LE16( addr )
+#define SET_LE16A( addr, data ) SET_LE16( addr, data )
+
+static uint8_t const initial_regs [Spc_Dsp::register_count] =
+{
+	0x45,0x8B,0x5A,0x9A,0xE4,0x82,0x1B,0x78,0x00,0x00,0xAA,0x96,0x89,0x0E,0xE0,0x80,
+	0x2A,0x49,0x3D,0xBA,0x14,0xA0,0xAC,0xC5,0x00,0x00,0x51,0xBB,0x9C,0x4E,0x7B,0xFF,
+	0xF4,0xFD,0x57,0x32,0x37,0xD9,0x42,0x22,0x00,0x00,0x5B,0x3C,0x9F,0x1B,0x87,0x9A,
+	0x6F,0x27,0xAF,0x7B,0xE5,0x68,0x0A,0xD9,0x00,0x00,0x9A,0xC5,0x9C,0x4E,0x7B,0xFF,
+	0xEA,0x21,0x78,0x4F,0xDD,0xED,0x24,0x14,0x00,0x00,0x77,0xB1,0xD1,0x36,0xC1,0x67,
+	0x52,0x57,0x46,0x3D,0x59,0xF4,0x87,0xA4,0x00,0x00,0x7E,0x44,0x9C,0x4E,0x7B,0xFF,
+	0x75,0xF5,0x06,0x97,0x10,0xC3,0x24,0xBB,0x00,0x00,0x7B,0x7A,0xE0,0x60,0x12,0x0F,
+	0xF7,0x74,0x1C,0xE5,0x39,0x3D,0x73,0xC1,0x00,0x00,0x7A,0xB3,0xFF,0x4E,0x7B,0xFF
+};
+
+// if ( io < -32768 ) io = -32768;
+// if ( io >  32767 ) io =  32767;
+#define CLAMP16( io )\
+{\
+	if ( (int16_t) io != io )\
+		io = (io >> 31) ^ 0x7FFF;\
+}
+
+// Access global DSP register
+#define REG(n)      m.regs [r_##n]
+
+// Access voice DSP register
+#define VREG(r,n)   r [v_##n]
+
+#define WRITE_SAMPLES( l, r, out ) \
+{\
+	out [0] = l;\
+	out [1] = r;\
+	out += 2;\
+	if ( out >= m.out_end )\
+	{\
+		check( out == m.out_end );\
+		check( m.out_end != &m.extra [extra_size] || \
+			(m.extra <= m.out_begin && m.extra < &m.extra [extra_size]) );\
+		out       = m.extra;\
+		m.out_end = &m.extra [extra_size];\
+	}\
+}\
+
+void Spc_Dsp::set_output( sample_t* out, int size )
+{
+	require( (size & 1) == 0 ); // must be even
+	if ( !out )
+	{
+		out  = m.extra;
+		size = extra_size;
+	}
+	m.out_begin = out;
+	m.out       = out;
+	m.out_end   = out + size;
+}
+
+// Volume registers and efb are signed! Easy to forget int8_t cast.
+// Prefixes are to avoid accidental use of locals with same names.
+
+// Interleved gauss table (to improve cache coherency)
+// interleved_gauss [i] = gauss [(i & 1) * 256 + 255 - (i >> 1 & 0xFF)]
+static short const interleved_gauss [512] =
+{
+ 370,1305, 366,1305, 362,1304, 358,1304, 354,1304, 351,1304, 347,1304, 343,1303,
+ 339,1303, 336,1303, 332,1302, 328,1302, 325,1301, 321,1300, 318,1300, 314,1299,
+ 311,1298, 307,1297, 304,1297, 300,1296, 297,1295, 293,1294, 290,1293, 286,1292,
+ 283,1291, 280,1290, 276,1288, 273,1287, 270,1286, 267,1284, 263,1283, 260,1282,
+ 257,1280, 254,1279, 251,1277, 248,1275, 245,1274, 242,1272, 239,1270, 236,1269,
+ 233,1267, 230,1265, 227,1263, 224,1261, 221,1259, 218,1257, 215,1255, 212,1253,
+ 210,1251, 207,1248, 204,1246, 201,1244, 199,1241, 196,1239, 193,1237, 191,1234,
+ 188,1232, 186,1229, 183,1227, 180,1224, 178,1221, 175,1219, 173,1216, 171,1213,
+ 168,1210, 166,1207, 163,1205, 161,1202, 159,1199, 156,1196, 154,1193, 152,1190,
+ 150,1186, 147,1183, 145,1180, 143,1177, 141,1174, 139,1170, 137,1167, 134,1164,
+ 132,1160, 130,1157, 128,1153, 126,1150, 124,1146, 122,1143, 120,1139, 118,1136,
+ 117,1132, 115,1128, 113,1125, 111,1121, 109,1117, 107,1113, 106,1109, 104,1106,
+ 102,1102, 100,1098,  99,1094,  97,1090,  95,1086,  94,1082,  92,1078,  90,1074,
+  89,1070,  87,1066,  86,1061,  84,1057,  83,1053,  81,1049,  80,1045,  78,1040,
+  77,1036,  76,1032,  74,1027,  73,1023,  71,1019,  70,1014,  69,1010,  67,1005,
+  66,1001,  65, 997,  64, 992,  62, 988,  61, 983,  60, 978,  59, 974,  58, 969,
+  56, 965,  55, 960,  54, 955,  53, 951,  52, 946,  51, 941,  50, 937,  49, 932,
+  48, 927,  47, 923,  46, 918,  45, 913,  44, 908,  43, 904,  42, 899,  41, 894,
+  40, 889,  39, 884,  38, 880,  37, 875,  36, 870,  36, 865,  35, 860,  34, 855,
+  33, 851,  32, 846,  32, 841,  31, 836,  30, 831,  29, 826,  29, 821,  28, 816,
+  27, 811,  27, 806,  26, 802,  25, 797,  24, 792,  24, 787,  23, 782,  23, 777,
+  22, 772,  21, 767,  21, 762,  20, 757,  20, 752,  19, 747,  19, 742,  18, 737,
+  17, 732,  17, 728,  16, 723,  16, 718,  15, 713,  15, 708,  15, 703,  14, 698,
+  14, 693,  13, 688,  13, 683,  12, 678,  12, 674,  11, 669,  11, 664,  11, 659,
+  10, 654,  10, 649,  10, 644,   9, 640,   9, 635,   9, 630,   8, 625,   8, 620,
+   8, 615,   7, 611,   7, 606,   7, 601,   6, 596,   6, 592,   6, 587,   6, 582,
+   5, 577,   5, 573,   5, 568,   5, 563,   4, 559,   4, 554,   4, 550,   4, 545,
+   4, 540,   3, 536,   3, 531,   3, 527,   3, 522,   3, 517,   2, 513,   2, 508,
+   2, 504,   2, 499,   2, 495,   2, 491,   2, 486,   1, 482,   1, 477,   1, 473,
+   1, 469,   1, 464,   1, 460,   1, 456,   1, 451,   1, 447,   1, 443,   1, 439,
+   0, 434,   0, 430,   0, 426,   0, 422,   0, 418,   0, 414,   0, 410,   0, 405,
+   0, 401,   0, 397,   0, 393,   0, 389,   0, 385,   0, 381,   0, 378,   0, 374,
+};
+
+
+//// Counters
+
+#define RATE( rate, div )\
+	(rate >= div ? rate / div * 8 - 1 : rate - 1)
+
+static unsigned const counter_mask [32] =
+{
+	RATE(   2,2), RATE(2048,4), RATE(1536,3),
+	RATE(1280,5), RATE(1024,4), RATE( 768,3),
+	RATE( 640,5), RATE( 512,4), RATE( 384,3),
+	RATE( 320,5), RATE( 256,4), RATE( 192,3),
+	RATE( 160,5), RATE( 128,4), RATE(  96,3),
+	RATE(  80,5), RATE(  64,4), RATE(  48,3),
+	RATE(  40,5), RATE(  32,4), RATE(  24,3),
+	RATE(  20,5), RATE(  16,4), RATE(  12,3),
+	RATE(  10,5), RATE(   8,4), RATE(   6,3),
+	RATE(   5,5), RATE(   4,4), RATE(   3,3),
+	              RATE(   2,4),
+	              RATE(   1,4)
+};
+#undef RATE
+
+inline void Spc_Dsp::init_counter()
+{
+	// counters start out with this synchronization
+	m.counters [0] =     1;
+	m.counters [1] =     0;
+	m.counters [2] = -0x20u;
+	m.counters [3] =  0x0B;
+	
+	int n = 2;
+	for ( int i = 1; i < 32; i++ )
+	{
+		m.counter_select [i] = &m.counters [n];
+		if ( !--n )
+			n = 3;
+	}
+	m.counter_select [ 0] = &m.counters [0];
+	m.counter_select [30] = &m.counters [2];
+}
+
+inline void Spc_Dsp::run_counter( int i )
+{
+	int n = m.counters [i];
+	if ( !(n-- & 7) )
+		n -= 6 - i;
+	m.counters [i] = n;
+}
+
+#define READ_COUNTER( rate )\
+	(*m.counter_select [rate] & counter_mask [rate])
+
+
+//// Emulation
+
+void Spc_Dsp::run( int clock_count )
+{
+	int new_phase = m.phase + clock_count;
+	int count = new_phase >> 5;
+	m.phase = new_phase & 31;
+	if ( !count )
+		return;
+	
+	uint8_t* const ram = m.ram;
+	uint8_t const* const dir = &ram [REG(dir) * 0x100];
+	int const slow_gaussian = (REG(pmon) >> 1) | REG(non);
+	int const noise_rate = REG(flg) & 0x1F;
+	
+	// Global volume
+	int mvoll = (int8_t) REG(mvoll);
+	int mvolr = (int8_t) REG(mvolr);
+	if ( mvoll * mvolr < m.surround_threshold )
+		mvoll = -mvoll; // eliminate surround
+	
+	do
+	{
+		// KON/KOFF reading
+		if ( (m.every_other_sample ^= 1) != 0 )
+		{
+			m.new_kon &= ~m.kon;
+			m.kon    = m.new_kon;
+			m.t_koff = REG(koff); 
+		}
+		
+		run_counter( 1 );
+		run_counter( 2 );
+		run_counter( 3 );
+		
+		// Noise
+		if ( !READ_COUNTER( noise_rate ) )
+		{
+			int feedback = (m.noise << 13) ^ (m.noise << 14);
+			m.noise = (feedback & 0x4000) ^ (m.noise >> 1);
+		}
+		
+		// Voices
+		int pmon_input = 0;
+		int main_out_l = 0;
+		int main_out_r = 0;
+		int echo_out_l = 0;
+		int echo_out_r = 0;
+		voice_t* v = m.voices;
+		uint8_t* v_regs = m.regs;
+		int vbit = 1;
+		do
+		{
+			#define SAMPLE_PTR(i) GET_LE16A( &dir [VREG(v_regs,srcn) * 4 + i * 2] )
+			
+			int brr_header = ram [v->brr_addr];
+			int kon_delay = v->kon_delay;
+			
+			// Pitch
+			int pitch = GET_LE16A( &VREG(v_regs,pitchl) ) & 0x3FFF;
+			if ( REG(pmon) & vbit )
+				pitch += ((pmon_input >> 5) * pitch) >> 10;
+			
+			// KON phases
+			if ( --kon_delay >= 0 )
+			{
+				v->kon_delay = kon_delay;
+				
+				// Get ready to start BRR decoding on next sample
+				if ( kon_delay == 4 )
+				{
+					v->brr_addr   = SAMPLE_PTR( 0 );
+					v->brr_offset = 1;
+					v->buf_pos    = v->buf;
+					brr_header    = 0; // header is ignored on this sample
+				}
+				
+				// Envelope is never run during KON
+				v->env        = 0;
+				v->hidden_env = 0;
+				
+				// Disable BRR decoding until last three samples
+				v->interp_pos = (kon_delay & 3 ? 0x4000 : 0);
+				
+				// Pitch is never added during KON
+				pitch = 0;
+			}
+			
+			int env = v->env;
+			
+			// Gaussian interpolation
+			{
+				int output = 0;
+				VREG(v_regs,envx) = (uint8_t) (env >> 4);
+				if ( env )
+				{
+					// Make pointers into gaussian based on fractional position between samples
+					int offset = (unsigned) v->interp_pos >> 3 & 0x1FE;
+					short const* fwd = interleved_gauss       + offset;
+					short const* rev = interleved_gauss + 510 - offset; // mirror left half of gaussian
+					
+					int const* in = &v->buf_pos [(unsigned) v->interp_pos >> 12];
+					
+					if ( !(slow_gaussian & vbit) ) // 99%
+					{
+						// Faster approximation when exact sample value isn't necessary for pitch mod
+						output = (fwd [0] * in [0] +
+						          fwd [1] * in [1] +
+						          rev [1] * in [2] +
+						          rev [0] * in [3]) >> 11;
+						output = (output * env) >> 11;
+					}
+					else
+					{
+						output = (int16_t) (m.noise * 2);
+						if ( !(REG(non) & vbit) )
+						{
+							output  = (fwd [0] * in [0]) >> 11;
+							output += (fwd [1] * in [1]) >> 11;
+							output += (rev [1] * in [2]) >> 11;
+							output = (int16_t) output;
+							output += (rev [0] * in [3]) >> 11;
+							
+							CLAMP16( output );
+							output &= ~1;
+						}
+						output = (output * env) >> 11 & ~1;
+					}
+					
+					// Output
+					int l = output * v->volume [0];
+					int r = output * v->volume [1];
+					
+					main_out_l += l;
+					main_out_r += r;
+					
+					if ( REG(eon) & vbit )
+					{
+						echo_out_l += l;
+						echo_out_r += r;
+					}
+				}
+				
+				pmon_input = output;
+				VREG(v_regs,outx) = (uint8_t) (output >> 8);
+			}
+			
+			// Soft reset or end of sample
+			if ( REG(flg) & 0x80 || (brr_header & 3) == 1 )
+			{
+				v->env_mode = env_release;
+				env         = 0;
+			}
+			
+			if ( m.every_other_sample )
+			{
+				// KOFF
+				if ( m.t_koff & vbit )
+					v->env_mode = env_release;
+				
+				// KON
+				if ( m.kon & vbit )
+				{
+					v->kon_delay = 5;
+					v->env_mode  = env_attack;
+					REG(endx) &= ~vbit;
+				}
+			}
+			
+			// Envelope
+			if ( !v->kon_delay )
+			{
+				if ( v->env_mode == env_release ) // 97%
+				{
+					env -= 0x8;
+					v->env = env;
+					if ( env <= 0 )
+					{
+						v->env = 0;
+						goto skip_brr; // no BRR decoding for you!
+					}
+				}
+				else // 3%
+				{
+					int rate;
+					int const adsr0 = VREG(v_regs,adsr0);
+					int env_data = VREG(v_regs,adsr1);
+					if ( adsr0 >= 0x80 ) // 97% ADSR
+					{
+						if ( v->env_mode > env_decay ) // 89%
+						{
+							env--;
+							env -= env >> 8;
+							rate = env_data & 0x1F;
+							
+							// optimized handling
+							v->hidden_env = env;
+							if ( READ_COUNTER( rate ) )
+								goto exit_env;
+							v->env = env;
+							goto exit_env;
+						}
+						else if ( v->env_mode == env_decay )
+						{
+							env--;
+							env -= env >> 8;
+							rate = (adsr0 >> 3 & 0x0E) + 0x10;
+						}
+						else // env_attack
+						{
+							rate = (adsr0 & 0x0F) * 2 + 1;
+							env += rate < 31 ? 0x20 : 0x400;
+						}
+					}
+					else // GAIN
+					{
+						int mode;
+						env_data = VREG(v_regs,gain);
+						mode = env_data >> 5;
+						if ( mode < 4 ) // direct
+						{
+							env = env_data * 0x10;
+							rate = 31;
+						}
+						else
+						{
+							rate = env_data & 0x1F;
+							if ( mode == 4 ) // 4: linear decrease
+							{
+								env -= 0x20;
+							}
+							else if ( mode < 6 ) // 5: exponential decrease
+							{
+								env--;
+								env -= env >> 8;
+							}
+							else // 6,7: linear increase
+							{
+								env += 0x20;
+								if ( mode > 6 && (unsigned) v->hidden_env >= 0x600 )
+									env += 0x8 - 0x20; // 7: two-slope linear increase
+							}
+						}
+					}
+					
+					// Sustain level
+					if ( (env >> 8) == (env_data >> 5) && v->env_mode == env_decay )
+						v->env_mode = env_sustain;
+					
+					v->hidden_env = env;
+					
+					// unsigned cast because linear decrease going negative also triggers this
+					if ( (unsigned) env > 0x7FF )
+					{
+						env = (env < 0 ? 0 : 0x7FF);
+						if ( v->env_mode == env_attack )
+							v->env_mode = env_decay;
+					}
+					
+					if ( !READ_COUNTER( rate ) )
+						v->env = env; // nothing else is controlled by the counter
+				}
+			}
+		exit_env:
+			
+			{
+				// Apply pitch
+				int old_pos = v->interp_pos;
+				int interp_pos = (old_pos & 0x3FFF) + pitch;
+				if ( interp_pos > 0x7FFF )
+					interp_pos = 0x7FFF;
+				v->interp_pos = interp_pos;
+				
+				// BRR decode if necessary
+				if ( old_pos >= 0x4000 )
+				{
+					// Arrange the four input nybbles in 0xABCD order for easy decoding
+					int nybbles = ram [(v->brr_addr + v->brr_offset) & 0xFFFF] * 0x100 +
+							ram [(v->brr_addr + v->brr_offset + 1) & 0xFFFF];
+					
+					// Advance read position
+					int const brr_block_size = 9;
+					int brr_offset = v->brr_offset;
+					if ( (brr_offset += 2) >= brr_block_size )
+					{
+						// Next BRR block
+						int brr_addr = (v->brr_addr + brr_block_size) & 0xFFFF;
+						assert( brr_offset == brr_block_size );
+						if ( brr_header & 1 )
+						{
+							brr_addr = SAMPLE_PTR( 1 );
+							if ( !v->kon_delay )
+								REG(endx) |= vbit;
+						}
+						v->brr_addr = brr_addr;
+						brr_offset  = 1;
+					}
+					v->brr_offset = brr_offset;
+					
+					// Decode
+					
+					// 0: >>1  1: <<0  2: <<1 ... 12: <<11  13-15: >>4 <<11
+					static unsigned char const shifts [16 * 2] = {
+						13,12,12,12,12,12,12,12,12,12,12, 12, 12, 16, 16, 16,
+						 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11
+					};
+					int const scale = brr_header >> 4;
+					int const right_shift = shifts [scale];
+					int const left_shift  = shifts [scale + 16];
+					
+					// Write to next four samples in circular buffer
+					int* pos = v->buf_pos;
+					int* end;
+					
+					// Decode four samples
+					for ( end = pos + 4; pos < end; pos++, nybbles <<= 4 )
+					{
+						// Extract upper nybble and scale appropriately. Every cast is
+						// necessary to maintain correctness and avoid undef behavior
+						int s = int16_t(uint16_t((int16_t) nybbles >> right_shift) << left_shift);
+						
+						// Apply IIR filter (8 is the most commonly used)
+						int const filter = brr_header & 0x0C;
+						int const p1 = pos [brr_buf_size - 1];
+						int const p2 = pos [brr_buf_size - 2] >> 1;
+						if ( filter >= 8 )
+						{
+							s += p1;
+							s -= p2;
+							if ( filter == 8 ) // s += p1 * 0.953125 - p2 * 0.46875
+							{
+								s += p2 >> 4;
+								s += (p1 * -3) >> 6;
+							}
+							else // s += p1 * 0.8984375 - p2 * 0.40625
+							{
+								s += (p1 * -13) >> 7;
+								s += (p2 * 3) >> 4;
+							}
+						}
+						else if ( filter ) // s += p1 * 0.46875
+						{
+							s += p1 >> 1;
+							s += (-p1) >> 5;
+						}
+						
+						// Adjust and write sample
+						CLAMP16( s );
+						s = (int16_t) (s * 2);
+						pos [brr_buf_size] = pos [0] = s; // second copy simplifies wrap-around
+					}
+					
+					if ( pos >= &v->buf [brr_buf_size] )
+						pos = v->buf;
+					v->buf_pos = pos;
+				}
+			}
+skip_brr:
+			// Next voice
+			vbit <<= 1;
+			v_regs += 0x10;
+			v++;
+		}
+		while ( vbit < 0x100 );
+		
+		// Echo position
+		int echo_offset = m.echo_offset;
+		uint8_t* const echo_ptr = &ram [(REG(esa) * 0x100 + echo_offset) & 0xFFFF];
+		if ( !echo_offset )
+			m.echo_length = (REG(edl) & 0x0F) * 0x800;
+		echo_offset += 4;
+		if ( echo_offset >= m.echo_length )
+			echo_offset = 0;
+		m.echo_offset = echo_offset;
+		
+		// FIR
+		int echo_in_l = GET_LE16SA( echo_ptr + 0 );
+		int echo_in_r = GET_LE16SA( echo_ptr + 2 );
+		
+		int (*echo_hist_pos) [2] = m.echo_hist_pos;
+		if ( ++echo_hist_pos >= &m.echo_hist [echo_hist_size] )
+			echo_hist_pos = m.echo_hist;
+		m.echo_hist_pos = echo_hist_pos;
+		
+		echo_hist_pos [0] [0] = echo_hist_pos [8] [0] = echo_in_l;
+		echo_hist_pos [0] [1] = echo_hist_pos [8] [1] = echo_in_r;
+		
+		#define CALC_FIR_( i, in )  ((in) * (int8_t) REG(fir + i * 0x10))
+		echo_in_l = CALC_FIR_( 7, echo_in_l );
+		echo_in_r = CALC_FIR_( 7, echo_in_r );
+		
+		#define CALC_FIR( i, ch )   CALC_FIR_( i, echo_hist_pos [i + 1] [ch] )
+		#define DO_FIR( i )\
+			echo_in_l += CALC_FIR( i, 0 );\
+			echo_in_r += CALC_FIR( i, 1 );
+		DO_FIR( 0 );
+		DO_FIR( 1 );
+		DO_FIR( 2 );
+		#if defined (__MWERKS__) && __MWERKS__ < 0x3200
+			__eieio(); // keeps compiler from stupidly "caching" things in memory
+		#endif
+		DO_FIR( 3 );
+		DO_FIR( 4 );
+		DO_FIR( 5 );
+		DO_FIR( 6 );
+		
+		// Echo out
+		if ( !(REG(flg) & 0x20) )
+		{
+			int l = (echo_out_l >> 7) + ((echo_in_l * (int8_t) REG(efb)) >> 14);
+			int r = (echo_out_r >> 7) + ((echo_in_r * (int8_t) REG(efb)) >> 14);
+			
+			// just to help pass more validation tests
+			#if SPC_MORE_ACCURACY
+				l &= ~1;
+				r &= ~1;
+			#endif
+			
+			CLAMP16( l );
+			CLAMP16( r );
+			
+			SET_LE16A( echo_ptr + 0, l );
+			SET_LE16A( echo_ptr + 2, r );
+		}
+		
+		// Sound out
+		int l = (main_out_l * mvoll + echo_in_l * (int8_t) REG(evoll)) >> 14;
+		int r = (main_out_r * mvolr + echo_in_r * (int8_t) REG(evolr)) >> 14;
+		
+		CLAMP16( l );
+		CLAMP16( r );
+		
+		if ( (REG(flg) & 0x40) )
+		{
+			l = 0;
+			r = 0;
+		}
+		
+		sample_t* out = m.out;
+		WRITE_SAMPLES( l, r, out );
+		m.out = out;
+	}
+	while ( --count );
+}
+
+
+//// Setup
+
+void Spc_Dsp::mute_voices( int mask )
+{
+	m.mute_mask = mask;
+	for ( int i = 0; i < voice_count; i++ )
+	{
+		m.voices [i].enabled = (mask >> i & 1) - 1;
+		update_voice_vol( i * 0x10 );
+	}
+}
+
+void Spc_Dsp::init( void* ram_64k )
+{
+	m.ram = (uint8_t*) ram_64k;
+	mute_voices( 0 );
+	disable_surround( false );
+	set_output( 0, 0 );
+	reset();
+	
+	#ifndef NDEBUG
+		// be sure this sign-extends
+		assert( (int16_t) 0x8000 == -0x8000 );
+		
+		// be sure right shift preserves sign
+		assert( (-1 >> 1) == -1 );
+		
+		// check clamp macro
+		int i;
+		i = +0x8000; CLAMP16( i ); assert( i == +0x7FFF );
+		i = -0x8001; CLAMP16( i ); assert( i == -0x8000 );
+		
+		blargg_verify_byte_order();
+	#endif
+}
+
+void Spc_Dsp::soft_reset_common()
+{
+	require( m.ram ); // init() must have been called already
+	
+	m.noise              = 0x4000;
+	m.echo_hist_pos      = m.echo_hist;
+	m.every_other_sample = 1;
+	m.echo_offset        = 0;
+	m.phase              = 0;
+	
+	init_counter();
+}
+
+void Spc_Dsp::soft_reset()
+{
+	REG(flg) = 0xE0;
+	soft_reset_common();
+}
+
+void Spc_Dsp::load( uint8_t const regs [register_count] )
+{
+	memcpy( m.regs, regs, sizeof m.regs );
+	memset( &m.regs [register_count], 0, offsetof (state_t,ram) - register_count );
+	
+	// Internal state
+	int i;
+	for ( i = voice_count; --i >= 0; )
+	{
+		voice_t& v = m.voices [i];
+		v.brr_offset = 1;
+		v.buf_pos    = v.buf;
+	}
+	m.new_kon = REG(kon);
+	
+	mute_voices( m.mute_mask );
+	soft_reset_common();
+}
+
+void Spc_Dsp::reset() { load( initial_regs ); }
diff --git a/libraries/game-music-emu/gme/Spc_Dsp.h b/libraries/game-music-emu/gme/Spc_Dsp.h
new file mode 100644
index 000000000..b364f0845
--- /dev/null
+++ b/libraries/game-music-emu/gme/Spc_Dsp.h
@@ -0,0 +1,207 @@
+// Fast SNES SPC-700 DSP emulator (about 3x speed of accurate one)
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef SPC_DSP_H
+#define SPC_DSP_H
+
+#include "blargg_common.h"
+
+struct Spc_Dsp {
+public:
+// Setup
+	
+	// Initializes DSP and has it use the 64K RAM provided
+	void init( void* ram_64k );
+
+	// Sets destination for output samples. If out is NULL or out_size is 0,
+	// doesn't generate any.
+	typedef short sample_t;
+	void set_output( sample_t* out, int out_size );
+
+	// Number of samples written to output since it was last set, always
+	// a multiple of 2. Undefined if more samples were generated than
+	// output buffer could hold.
+	int sample_count() const;
+
+// Emulation
+	
+	// Resets DSP to power-on state
+	void reset();
+
+	// Emulates pressing reset switch on SNES
+	void soft_reset();
+	
+	// Reads/writes DSP registers. For accuracy, you must first call spc_run_dsp()
+	// to catch the DSP up to present.
+	int  read ( int addr ) const;
+	void write( int addr, int data );
+
+	// Runs DSP for specified number of clocks (~1024000 per second). Every 32 clocks
+	// a pair of samples is be generated.
+	void run( int clock_count );
+
+// Sound control
+
+	// Mutes voices corresponding to non-zero bits in mask (overrides VxVOL with 0).
+	// Reduces emulation accuracy.
+	enum { voice_count = 8 };
+	void mute_voices( int mask );
+
+	// If true, prevents channels and global volumes from being phase-negated
+	void disable_surround( bool disable = true );
+
+// State
+	
+	// Resets DSP and uses supplied values to initialize registers
+	enum { register_count = 128 };
+	void load( uint8_t const regs [register_count] );
+
+// DSP register addresses
+
+	// Global registers
+	enum {
+	    r_mvoll = 0x0C, r_mvolr = 0x1C,
+	    r_evoll = 0x2C, r_evolr = 0x3C,
+	    r_kon   = 0x4C, r_koff  = 0x5C,
+	    r_flg   = 0x6C, r_endx  = 0x7C,
+	    r_efb   = 0x0D, r_pmon  = 0x2D,
+	    r_non   = 0x3D, r_eon   = 0x4D,
+	    r_dir   = 0x5D, r_esa   = 0x6D,
+	    r_edl   = 0x7D,
+	    r_fir   = 0x0F // 8 coefficients at 0x0F, 0x1F ... 0x7F
+	};
+
+	// Voice registers
+	enum {
+		v_voll   = 0x00, v_volr   = 0x01,
+		v_pitchl = 0x02, v_pitchh = 0x03,
+		v_srcn   = 0x04, v_adsr0  = 0x05,
+		v_adsr1  = 0x06, v_gain   = 0x07,
+		v_envx   = 0x08, v_outx   = 0x09
+	};
+
+public:
+	enum { extra_size = 16 };
+	sample_t* extra()               { return m.extra; }
+	sample_t const* out_pos() const { return m.out; }
+public:
+	BLARGG_DISABLE_NOTHROW
+	
+	enum { echo_hist_size = 8 };
+	
+	enum env_mode_t { env_release, env_attack, env_decay, env_sustain };
+	enum { brr_buf_size = 12 };
+	struct voice_t
+	{
+		int buf [brr_buf_size*2];// decoded samples (twice the size to simplify wrap handling)
+		int* buf_pos;           // place in buffer where next samples will be decoded
+		int interp_pos;         // relative fractional position in sample (0x1000 = 1.0)
+		int brr_addr;           // address of current BRR block
+		int brr_offset;         // current decoding offset in BRR block
+		int kon_delay;          // KON delay/current setup phase
+		env_mode_t env_mode;
+		int env;                // current envelope level
+		int hidden_env;         // used by GAIN mode 7, very obscure quirk
+		int volume [2];         // copy of volume from DSP registers, with surround disabled
+		int enabled;            // -1 if enabled, 0 if muted
+	};
+private:
+	struct state_t
+	{
+		uint8_t regs [register_count];
+		
+		// Echo history keeps most recent 8 samples (twice the size to simplify wrap handling)
+		int echo_hist [echo_hist_size * 2] [2];
+		int (*echo_hist_pos) [2]; // &echo_hist [0 to 7]
+		
+		int every_other_sample; // toggles every sample
+		int kon;                // KON value when last checked
+		int noise;
+		int echo_offset;        // offset from ESA in echo buffer
+		int echo_length;        // number of bytes that echo_offset will stop at
+		int phase;              // next clock cycle to run (0-31)
+		unsigned counters [4];
+		
+		int new_kon;
+		int t_koff;
+		
+		voice_t voices [voice_count];
+		
+		unsigned* counter_select [32];
+		
+		// non-emulation state
+		uint8_t* ram; // 64K shared RAM between DSP and SMP
+		int mute_mask;
+		int surround_threshold;
+		sample_t* out;
+		sample_t* out_end;
+		sample_t* out_begin;
+		sample_t extra [extra_size];
+	};
+	state_t m;
+	
+	void init_counter();
+	void run_counter( int );
+	void soft_reset_common();
+	void write_outline( int addr, int data );
+	void update_voice_vol( int addr );
+};
+
+#include <assert.h>
+
+inline int Spc_Dsp::sample_count() const { return m.out - m.out_begin; }
+
+inline int Spc_Dsp::read( int addr ) const
+{
+	assert( (unsigned) addr < register_count );
+	return m.regs [addr];
+}
+
+inline void Spc_Dsp::update_voice_vol( int addr )
+{
+	int l = (int8_t) m.regs [addr + v_voll];
+	int r = (int8_t) m.regs [addr + v_volr];
+	
+	if ( l * r < m.surround_threshold )
+	{
+		// signs differ, so negate those that are negative
+		l ^= l >> 7;
+		r ^= r >> 7;
+	}
+	
+	voice_t& v = m.voices [addr >> 4];
+	int enabled = v.enabled;
+	v.volume [0] = l & enabled;
+	v.volume [1] = r & enabled;
+}
+
+inline void Spc_Dsp::write( int addr, int data )
+{
+	assert( (unsigned) addr < register_count );
+	
+	m.regs [addr] = (uint8_t) data;
+	int low = addr & 0x0F;
+	if ( low < 0x2 ) // voice volumes
+	{
+		update_voice_vol( low ^ addr );
+	}
+	else if ( low == 0xC )
+	{
+		if ( addr == r_kon )
+			m.new_kon = (uint8_t) data;
+		
+		if ( addr == r_endx ) // always cleared, regardless of data written
+			m.regs [r_endx] = 0;
+	}
+}
+
+inline void Spc_Dsp::disable_surround( bool disable )
+{
+	m.surround_threshold = disable ? 0 : -0x4000;
+}
+
+#define SPC_NO_COPY_STATE_FUNCS 1
+
+#define SPC_LESS_ACCURATE 1
+
+#endif
diff --git a/libraries/game-music-emu/gme/Spc_Emu.cpp b/libraries/game-music-emu/gme/Spc_Emu.cpp
new file mode 100644
index 000000000..0f45d8739
--- /dev/null
+++ b/libraries/game-music-emu/gme/Spc_Emu.cpp
@@ -0,0 +1,358 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Spc_Emu.h"
+
+#include "blargg_endian.h"
+#include <stdlib.h>
+#include <string.h>
+
+/* Copyright (C) 2004-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+// TODO: support Spc_Filter's bass
+
+Spc_Emu::Spc_Emu()
+{
+	set_type( gme_spc_type );
+	
+	static const char* const names [Snes_Spc::voice_count] = {
+		"DSP 1", "DSP 2", "DSP 3", "DSP 4", "DSP 5", "DSP 6", "DSP 7", "DSP 8"
+	};
+	set_voice_names( names );
+	
+	set_gain( 1.4 );
+}
+
+Spc_Emu::~Spc_Emu() { }
+
+// Track info
+
+long const trailer_offset = 0x10200;
+
+byte const* Spc_Emu::trailer() const { return &file_data [min( file_size, trailer_offset )]; }
+
+long Spc_Emu::trailer_size() const { return max( 0L, file_size - trailer_offset ); }
+
+static void get_spc_xid6( byte const* begin, long size, track_info_t* out )
+{
+	// header
+	byte const* end = begin + size;
+	if ( size < 8 || memcmp( begin, "xid6", 4 ) )
+	{
+		check( false );
+		return;
+	}
+	long info_size = get_le32( begin + 4 );
+	byte const* in = begin + 8; 
+	if ( end - in > info_size )
+	{
+		debug_printf( "Extra data after SPC xid6 info\n" );
+		end = in + info_size;
+	}
+	
+	int year = 0;
+	char copyright [256 + 5];
+	int copyright_len = 0;
+	int const year_len = 5;
+	
+	while ( end - in >= 4 )
+	{
+		// header
+		int id   = in [0];
+		int data = in [3] * 0x100 + in [2];
+		int type = in [1];
+		int len  = type ? data : 0;
+		in += 4;
+		if ( len > end - in )
+		{
+			check( false );
+			break; // block goes past end of data
+		}
+		
+		// handle specific block types
+		char* field = 0;
+		switch ( id )
+		{
+			case 0x01: field = out->song;    break;
+			case 0x02: field = out->game;    break;
+			case 0x03: field = out->author;  break;
+			case 0x04: field = out->dumper;  break;
+			case 0x07: field = out->comment; break;
+			case 0x14: year = data;          break;
+			
+			//case 0x30: // intro length
+			// Many SPCs have intro length set wrong for looped tracks, making it useless
+			/*
+			case 0x30:
+				check( len == 4 );
+				if ( len >= 4 )
+				{
+					out->intro_length = get_le32( in ) / 64;
+					if ( out->length > 0 )
+					{
+						long loop = out->length - out->intro_length;
+						if ( loop >= 2000 )
+							out->loop_length = loop;
+					}
+				}
+				break;
+			*/
+			
+			case 0x13:
+				copyright_len = min( len, (int) sizeof copyright - year_len );
+				memcpy( &copyright [year_len], in, copyright_len );
+				break;
+			
+			default:
+				if ( id < 0x01 || (id > 0x07 && id < 0x10) ||
+						(id > 0x14 && id < 0x30) || id > 0x36 )
+					debug_printf( "Unknown SPC xid6 block: %X\n", (int) id );
+				break;
+		}
+		if ( field )
+		{
+			check( type == 1 );
+			Gme_File::copy_field_( field, (char const*) in, len );
+		}
+		
+		// skip to next block
+		in += len;
+		
+		// blocks are supposed to be 4-byte aligned with zero-padding...
+		byte const* unaligned = in;
+		while ( (in - begin) & 3 && in < end )
+		{
+			if ( *in++ != 0 )
+			{
+				// ...but some files have no padding
+				in = unaligned;
+				debug_printf( "SPC info tag wasn't properly padded to align\n" );
+				break;
+			}
+		}
+	}
+	
+	char* p = &copyright [year_len];
+	if ( year )
+	{
+		*--p = ' ';
+		for ( int n = 4; n--; )
+		{
+			*--p = char (year % 10 + '0');
+			year /= 10;
+		}
+		copyright_len += year_len;
+	}
+	if ( copyright_len )
+		Gme_File::copy_field_( out->copyright, p, copyright_len );
+	
+	check( in == end );
+}
+
+static void get_spc_info( Spc_Emu::header_t const& h, byte const* xid6, long xid6_size,
+		track_info_t* out )
+{
+	// decode length (can be in text or binary format, sometimes ambiguous ugh)
+	long len_secs = 0;
+	for ( int i = 0; i < 3; i++ )
+	{
+		unsigned n = h.len_secs [i] - '0';
+		if ( n > 9 )
+		{
+			// ignore single-digit text lengths
+			// (except if author field is present and begins at offset 1, ugh)
+			if ( i == 1 && (h.author [0] || !h.author [1]) )
+				len_secs = 0;
+			break;
+		}
+		len_secs *= 10;
+		len_secs += n;
+	}
+	if ( !len_secs || len_secs > 0x1FFF )
+		len_secs = get_le16( h.len_secs );
+	if ( len_secs < 0x1FFF )
+		out->length = len_secs * 1000;
+	
+	int offset = (h.author [0] < ' ' || unsigned (h.author [0] - '0') <= 9);
+	Gme_File::copy_field_( out->author, &h.author [offset], sizeof h.author - offset );
+	
+	GME_COPY_FIELD( h, out, song );
+	GME_COPY_FIELD( h, out, game );
+	GME_COPY_FIELD( h, out, dumper );
+	GME_COPY_FIELD( h, out, comment );
+	
+	if ( xid6_size )
+		get_spc_xid6( xid6, xid6_size, out );
+}
+
+blargg_err_t Spc_Emu::track_info_( track_info_t* out, int ) const
+{
+	get_spc_info( header(), trailer(), trailer_size(), out );
+	return 0;
+}
+
+static blargg_err_t check_spc_header( void const* header )
+{
+	if ( memcmp( header, "SNES-SPC700 Sound File Data", 27 ) )
+		return gme_wrong_file_type;
+	return 0;
+}
+
+struct Spc_File : Gme_Info_
+{
+	Spc_Emu::header_t header;
+	blargg_vector<byte> xid6;
+	
+	Spc_File() { set_type( gme_spc_type ); }
+	
+	blargg_err_t load_( Data_Reader& in )
+	{
+		long file_size = in.remain();
+		if ( file_size < Snes_Spc::spc_min_file_size )
+			return gme_wrong_file_type;
+		RETURN_ERR( in.read( &header, Spc_Emu::header_size ) );
+		RETURN_ERR( check_spc_header( header.tag ) );
+		long const xid6_offset = 0x10200;
+		long xid6_size = file_size - xid6_offset;
+		if ( xid6_size > 0 )
+		{
+			RETURN_ERR( xid6.resize( xid6_size ) );
+			RETURN_ERR( in.skip( xid6_offset - Spc_Emu::header_size ) );
+			RETURN_ERR( in.read( xid6.begin(), xid6.size() ) );
+		}
+		return 0;
+	}
+	
+	blargg_err_t track_info_( track_info_t* out, int ) const
+	{
+		get_spc_info( header, xid6.begin(), xid6.size(), out );
+		return 0;
+	}
+};
+
+static Music_Emu* new_spc_emu () { return BLARGG_NEW Spc_Emu ; }
+static Music_Emu* new_spc_file() { return BLARGG_NEW Spc_File; }
+
+static gme_type_t_ const gme_spc_type_ = { "Super Nintendo", 1, &new_spc_emu, &new_spc_file, "SPC", 0 };
+BLARGG_EXPORT extern gme_type_t const gme_spc_type = &gme_spc_type_;
+
+
+// Setup
+
+blargg_err_t Spc_Emu::set_sample_rate_( long sample_rate )
+{
+	RETURN_ERR( apu.init() );
+	enable_accuracy( false );
+	if ( sample_rate != native_sample_rate )
+	{
+		RETURN_ERR( resampler.buffer_size( native_sample_rate / 20 * 2 ) );
+		resampler.time_ratio( (double) native_sample_rate / sample_rate, 0.9965 );
+	}
+	return 0;
+}
+
+void Spc_Emu::enable_accuracy_( bool b )
+{
+	Music_Emu::enable_accuracy_( b );
+	filter.enable( b );
+}
+
+void Spc_Emu::mute_voices_( int m )
+{
+	Music_Emu::mute_voices_( m );
+	apu.mute_voices( m );
+}
+
+blargg_err_t Spc_Emu::load_mem_( byte const* in, long size )
+{
+	assert( offsetof (header_t,unused2 [46]) == header_size );
+	file_data = in;
+	file_size = size;
+	set_voice_count( Snes_Spc::voice_count );
+	if ( size < Snes_Spc::spc_min_file_size )
+		return gme_wrong_file_type;
+	return check_spc_header( in );
+}
+
+// Emulation
+
+void Spc_Emu::set_tempo_( double t )
+{
+	apu.set_tempo( (int) (t * apu.tempo_unit) );
+}
+
+blargg_err_t Spc_Emu::start_track_( int track )
+{
+	RETURN_ERR( Music_Emu::start_track_( track ) );
+	resampler.clear();
+	filter.clear();
+	RETURN_ERR( apu.load_spc( file_data, file_size ) );
+	filter.set_gain( (int) (gain() * SPC_Filter::gain_unit) );
+	apu.clear_echo();
+	track_info_t spc_info;
+	RETURN_ERR( track_info_( &spc_info, track ) );
+
+	// Set a default track length, need a non-zero fadeout
+	if ( autoload_playback_limit() && ( spc_info.length > 0 ) )
+		set_fade ( spc_info.length, 50 );
+	return 0;
+}
+
+blargg_err_t Spc_Emu::play_and_filter( long count, sample_t out [] )
+{
+	RETURN_ERR( apu.play( count, out ) );
+	filter.run( out, count );
+	return 0;
+}
+
+blargg_err_t Spc_Emu::skip_( long count )
+{
+	if ( sample_rate() != native_sample_rate )
+	{
+		count = long (count * resampler.ratio()) & ~1;
+		count -= resampler.skip_input( count );
+	}
+	
+	// TODO: shouldn't skip be adjusted for the 64 samples read afterwards?
+	
+	if ( count > 0 )
+	{
+		RETURN_ERR( apu.skip( count ) );
+		filter.clear();
+	}
+	
+	// eliminate pop due to resampler
+	const int resampler_latency = 64;
+	sample_t buf [resampler_latency];
+	return play_( resampler_latency, buf );
+}
+
+blargg_err_t Spc_Emu::play_( long count, sample_t* out )
+{
+	if ( sample_rate() == native_sample_rate )
+		return play_and_filter( count, out );
+	
+	long remain = count;
+	while ( remain > 0 )
+	{
+		remain -= resampler.read( &out [count - remain], remain );
+		if ( remain > 0 )
+		{
+			long n = resampler.max_write();
+			RETURN_ERR( play_and_filter( n, resampler.buffer() ) );
+			resampler.write( n );
+		}
+	}
+	check( remain == 0 );
+	return 0;
+}
diff --git a/libraries/game-music-emu/gme/Spc_Emu.h b/libraries/game-music-emu/gme/Spc_Emu.h
new file mode 100644
index 000000000..76e1ac63d
--- /dev/null
+++ b/libraries/game-music-emu/gme/Spc_Emu.h
@@ -0,0 +1,82 @@
+// Super Nintendo SPC music file emulator
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef SPC_EMU_H
+#define SPC_EMU_H
+
+#include "Fir_Resampler.h"
+#include "Music_Emu.h"
+#include "Snes_Spc.h"
+#include "Spc_Filter.h"
+
+class Spc_Emu : public Music_Emu {
+public:
+	// The Super Nintendo hardware samples at 32kHz. Other sample rates are
+	// handled by resampling the 32kHz output; emulation accuracy is not affected.
+	enum { native_sample_rate = 32000 };
+	
+	// SPC file header
+	enum { header_size = 0x100 };
+	struct header_t
+	{
+		char tag [35];
+		byte format;
+		byte version;
+		byte pc [2];
+		byte a, x, y, psw, sp;
+		byte unused [2];
+		char song [32];
+		char game [32];
+		char dumper [16];
+		char comment [32];
+		byte date [11];
+		byte len_secs [3];
+		byte fade_msec [4];
+		char author [32]; // sometimes first char should be skipped (see official SPC spec)
+		byte mute_mask;
+		byte emulator;
+		byte unused2 [46];
+	};
+	
+	// Header for currently loaded file
+	header_t const& header() const { return *(header_t const*) file_data; }
+	
+	// Prevents channels and global volumes from being phase-negated
+	void disable_surround( bool disable = true );
+	
+	static gme_type_t static_type() { return gme_spc_type; }
+	
+public:
+	// deprecated
+	using Music_Emu::load;
+	blargg_err_t load( header_t const& h, Data_Reader& in ) // use Remaining_Reader
+			{ return load_remaining_( &h, sizeof h, in ); }
+	byte const* trailer() const; // use track_info()
+	long trailer_size() const;
+
+public:
+	Spc_Emu();
+	~Spc_Emu();
+protected:
+	blargg_err_t load_mem_( byte const*, long );
+	blargg_err_t track_info_( track_info_t*, int track ) const;
+	blargg_err_t set_sample_rate_( long );
+	blargg_err_t start_track_( int );
+	blargg_err_t play_( long, sample_t* );
+	blargg_err_t skip_( long );
+	void mute_voices_( int );
+	void set_tempo_( double );
+	void enable_accuracy_( bool );
+private:
+	byte const* file_data;
+	long        file_size;
+	Fir_Resampler<24> resampler;
+	SPC_Filter filter;
+	Snes_Spc apu;
+	
+	blargg_err_t play_and_filter( long count, sample_t out [] );
+};
+
+inline void Spc_Emu::disable_surround( bool b ) { apu.disable_surround( b ); }
+
+#endif
diff --git a/libraries/game-music-emu/gme/Spc_Filter.cpp b/libraries/game-music-emu/gme/Spc_Filter.cpp
new file mode 100644
index 000000000..2cc77fc93
--- /dev/null
+++ b/libraries/game-music-emu/gme/Spc_Filter.cpp
@@ -0,0 +1,83 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Spc_Filter.h"
+
+#include <string.h>
+
+/* Copyright (C) 2007 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+void SPC_Filter::clear() { memset( ch, 0, sizeof ch ); }
+
+SPC_Filter::SPC_Filter()
+{
+	enabled = true;
+	gain    = gain_unit;
+	bass    = bass_norm;
+	clear();
+}
+
+void SPC_Filter::run( short* io, int count )
+{
+	require( (count & 1) == 0 ); // must be even
+	
+	int const gain = this->gain;
+	if ( enabled )
+	{
+		int const bass = this->bass;
+		chan_t* c = &ch [2];
+		do
+		{
+			// cache in registers
+			int sum = (--c)->sum;
+			int pp1 = c->pp1;
+			int p1  = c->p1;
+			
+			for ( int i = 0; i < count; i += 2 )
+			{
+				// Low-pass filter (two point FIR with coeffs 0.25, 0.75)
+				int f = io [i] + p1;
+				p1 = io [i] * 3;
+				
+				// High-pass filter ("leaky integrator")
+				int delta = f - pp1;
+				pp1 = f;
+				int s = sum >> (gain_bits + 2);
+				sum += (delta * gain) - (sum >> bass);
+				
+				// Clamp to 16 bits
+				if ( (short) s != s )
+					s = (s >> 31) ^ 0x7FFF;
+				
+				io [i] = (short) s;
+			}
+			
+			c->p1  = p1;
+			c->pp1 = pp1;
+			c->sum = sum;
+			++io;
+		}
+		while ( c != ch );
+	}
+	else if ( gain != gain_unit )
+	{
+		short* const end = io + count;
+		while ( io < end )
+		{
+			int s = (*io * gain) >> gain_bits;
+			if ( (short) s != s )
+				s = (s >> 31) ^ 0x7FFF;
+			*io++ = (short) s;
+		}
+	}
+}
diff --git a/libraries/game-music-emu/gme/Spc_Filter.h b/libraries/game-music-emu/gme/Spc_Filter.h
new file mode 100644
index 000000000..d9994af5f
--- /dev/null
+++ b/libraries/game-music-emu/gme/Spc_Filter.h
@@ -0,0 +1,53 @@
+// Simple low-pass and high-pass filter to better match sound output of a SNES
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef SPC_FILTER_H
+#define SPC_FILTER_H
+
+#include "blargg_common.h"
+
+struct SPC_Filter {
+public:
+	
+	// Filters count samples of stereo sound in place. Count must be a multiple of 2.
+	typedef short sample_t;
+	void run( sample_t* io, int count );
+	
+// Optional features
+
+	// Clears filter to silence
+	void clear();
+	
+	// Sets gain (volume), where gain_unit is normal. Gains greater than gain_unit
+	// are fine, since output is clamped to 16-bit sample range.
+	enum { gain_unit = 0x100 };
+	void set_gain( int gain );
+	
+	// Enables/disables filtering (when disabled, gain is still applied)
+	void enable( bool b );
+	
+	// Sets amount of bass (logarithmic scale)
+	enum { bass_none =  0 };
+	enum { bass_norm =  8 }; // normal amount
+	enum { bass_max  = 31 };
+	void set_bass( int bass );
+	
+public:
+	SPC_Filter();
+	BLARGG_DISABLE_NOTHROW
+private:
+	enum { gain_bits = 8 };
+	int gain;
+	int bass;
+	bool enabled;
+	struct chan_t { int p1, pp1, sum; };
+	chan_t ch [2];
+};
+
+inline void SPC_Filter::enable( bool b )  { enabled = b; }
+
+inline void SPC_Filter::set_gain( int g ) { gain = g; }
+
+inline void SPC_Filter::set_bass( int b ) { bass = b; }
+
+#endif
diff --git a/libraries/game-music-emu/gme/Vgm_Emu.cpp b/libraries/game-music-emu/gme/Vgm_Emu.cpp
new file mode 100644
index 000000000..8f19b7de5
--- /dev/null
+++ b/libraries/game-music-emu/gme/Vgm_Emu.cpp
@@ -0,0 +1,434 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Vgm_Emu.h"
+
+#include "blargg_endian.h"
+#include <string.h>
+#include <math.h>
+
+/* Copyright (C) 2003-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+double const fm_gain = 3.0; // FM emulators are internally quieter to avoid 16-bit overflow
+double const rolloff = 0.990;
+double const oversample_factor = 1.5;
+
+Vgm_Emu::Vgm_Emu()
+{
+	disable_oversampling_ = false;
+	psg_rate   = 0;
+	set_type( gme_vgm_type );
+	
+	static int const types [8] = {
+		wave_type | 1, wave_type | 0, wave_type | 2, noise_type | 0
+	};
+	set_voice_types( types );
+	
+	set_silence_lookahead( 1 ); // tracks should already be trimmed
+	
+	set_equalizer( make_equalizer( -14.0, 80 ) );
+}
+
+Vgm_Emu::~Vgm_Emu() { }
+
+// Track info
+
+static byte const* skip_gd3_str( byte const* in, byte const* end )
+{
+	while ( end - in >= 2 )
+	{
+		in += 2;
+		if ( !(in [-2] | in [-1]) )
+			break;
+	}
+	return in;
+}
+
+static byte const* get_gd3_str( byte const* in, byte const* end, char* field )
+{
+	byte const* mid = skip_gd3_str( in, end );
+	int len = (mid - in) / 2 - 1;
+	if ( len > 0 )
+	{
+		len = min( len, (int) Gme_File::max_field_ );
+		field [len] = 0;
+		for ( int i = 0; i < len; i++ )
+			field [i] = (in [i * 2 + 1] ? '?' : in [i * 2]); // TODO: convert to utf-8
+	}
+	return mid;
+}
+
+static byte const* get_gd3_pair( byte const* in, byte const* end, char* field )
+{
+	return skip_gd3_str( get_gd3_str( in, end, field ), end );
+}
+
+static void parse_gd3( byte const* in, byte const* end, track_info_t* out )
+{
+	in = get_gd3_pair( in, end, out->song );
+	in = get_gd3_pair( in, end, out->game );
+	in = get_gd3_pair( in, end, out->system );
+	in = get_gd3_pair( in, end, out->author );
+	in = get_gd3_str ( in, end, out->copyright );
+	in = get_gd3_pair( in, end, out->dumper );
+	in = get_gd3_str ( in, end, out->comment );
+}
+
+int const gd3_header_size = 12;
+
+static long check_gd3_header( byte const* h, long remain )
+{
+	if ( remain < gd3_header_size ) return 0;
+	if ( memcmp( h, "Gd3 ", 4 ) ) return 0;
+	if ( get_le32( h + 4 ) >= 0x200 ) return 0;
+	
+	long gd3_size = get_le32( h + 8 );
+	if ( gd3_size > remain - gd3_header_size ) return 0;
+	
+	return gd3_size;
+}
+
+byte const* Vgm_Emu::gd3_data( int* size ) const
+{
+	if ( size )
+		*size = 0;
+	
+	long gd3_offset = get_le32( header().gd3_offset ) - 0x2C;
+	if ( gd3_offset < 0 )
+		return 0;
+	
+	byte const* gd3 = data + header_size + gd3_offset;
+	long gd3_size = check_gd3_header( gd3, data_end - gd3 );
+	if ( !gd3_size )
+		return 0;
+	
+	if ( size )
+		*size = gd3_size + gd3_header_size;
+	
+	return gd3;
+}
+
+static void get_vgm_length( Vgm_Emu::header_t const& h, track_info_t* out )
+{
+	long length = get_le32( h.track_duration ) * 10 / 441;
+	if ( length > 0 )
+	{
+		long loop = get_le32( h.loop_duration );
+		if ( loop > 0 && get_le32( h.loop_offset ) )
+		{
+			out->loop_length = loop * 10 / 441;
+			out->intro_length = length - out->loop_length;
+		}
+		else
+		{
+			out->length = length; // 1000 / 44100 (VGM files used 44100 as timebase)
+			out->intro_length = length; // make it clear that track is no longer than length
+			out->loop_length = 0;
+		}
+	}
+}
+
+blargg_err_t Vgm_Emu::track_info_( track_info_t* out, int ) const
+{
+	get_vgm_length( header(), out );
+	
+	int size;
+	byte const* gd3 = gd3_data( &size );
+	if ( gd3 )
+		parse_gd3( gd3 + gd3_header_size, gd3 + size, out );
+	
+	return 0;
+}
+
+static blargg_err_t check_vgm_header( Vgm_Emu::header_t const& h )
+{
+	if ( memcmp( h.tag, "Vgm ", 4 ) )
+		return gme_wrong_file_type;
+	return 0;
+}
+
+struct Vgm_File : Gme_Info_
+{
+	Vgm_Emu::header_t h;
+	blargg_vector<byte> gd3;
+	
+	Vgm_File() { set_type( gme_vgm_type ); }
+	
+	blargg_err_t load_( Data_Reader& in )
+	{
+		long file_size = in.remain();
+		if ( file_size <= Vgm_Emu::header_size )
+			return gme_wrong_file_type;
+		
+		RETURN_ERR( in.read( &h, Vgm_Emu::header_size ) );
+		RETURN_ERR( check_vgm_header( h ) );
+		
+		long gd3_offset = get_le32( h.gd3_offset ) - 0x2C;
+		long remain = file_size - Vgm_Emu::header_size - gd3_offset;
+		byte gd3_h [gd3_header_size];
+		if ( gd3_offset > 0 && remain >= gd3_header_size )
+		{
+			RETURN_ERR( in.skip( gd3_offset ) );
+			RETURN_ERR( in.read( gd3_h, sizeof gd3_h ) );
+			long gd3_size = check_gd3_header( gd3_h, remain );
+			if ( gd3_size )
+			{
+				RETURN_ERR( gd3.resize( gd3_size ) );
+				RETURN_ERR( in.read( gd3.begin(), gd3.size() ) );
+			}
+		}
+		return 0;
+	}
+	
+	blargg_err_t track_info_( track_info_t* out, int ) const
+	{
+		get_vgm_length( h, out );
+		if ( gd3.size() )
+			parse_gd3( gd3.begin(), gd3.end(), out );
+		return 0;
+	}
+};
+
+static Music_Emu* new_vgm_emu () { return BLARGG_NEW Vgm_Emu ; }
+static Music_Emu* new_vgm_file() { return BLARGG_NEW Vgm_File; }
+
+static gme_type_t_ const gme_vgm_type_ = { "Sega SMS/Genesis", 1, &new_vgm_emu, &new_vgm_file, "VGM", 1 };
+BLARGG_EXPORT extern gme_type_t const gme_vgm_type = &gme_vgm_type_;
+
+static gme_type_t_ const gme_vgz_type_ = { "Sega SMS/Genesis", 1, &new_vgm_emu, &new_vgm_file, "VGZ", 1 };
+BLARGG_EXPORT extern gme_type_t const gme_vgz_type = &gme_vgz_type_;
+
+
+// Setup
+
+void Vgm_Emu::set_tempo_( double t )
+{
+	if ( psg_rate )
+	{
+		vgm_rate = (long) (44100 * t + 0.5);
+		blip_time_factor = (long) floor( double (1L << blip_time_bits) / vgm_rate * psg_rate + 0.5 );
+		//debug_printf( "blip_time_factor: %ld\n", blip_time_factor );
+		//debug_printf( "vgm_rate: %ld\n", vgm_rate );
+		// TODO: remove? calculates vgm_rate more accurately (above differs at most by one Hz only)
+		//blip_time_factor = (long) floor( double (1L << blip_time_bits) * psg_rate / 44100 / t + 0.5 );
+		//vgm_rate = (long) floor( double (1L << blip_time_bits) * psg_rate / blip_time_factor + 0.5 );
+		
+		fm_time_factor = 2 + (long) floor( fm_rate * (1L << fm_time_bits) / vgm_rate + 0.5 );
+	}
+}
+
+blargg_err_t Vgm_Emu::set_sample_rate_( long sample_rate )
+{
+	RETURN_ERR( blip_buf.set_sample_rate( sample_rate, 1000 / 30 ) );
+	return Classic_Emu::set_sample_rate_( sample_rate );
+}
+
+blargg_err_t Vgm_Emu::set_multi_channel ( bool is_enabled )
+{
+	// we acutally should check here whether this is classic emu or not
+	// however set_multi_channel() is called before setup_fm() resulting in uninited is_classic_emu()
+	// hard code it to unsupported
+#if 0
+	if ( is_classic_emu() )
+	{
+		RETURN_ERR( Music_Emu::set_multi_channel_( is_enabled ) );
+		return 0;
+	}
+	else
+#endif
+	{
+		(void) is_enabled;
+		return "multichannel rendering not supported for YM2*** FM sound chip emulators";
+	}
+}
+
+void Vgm_Emu::update_eq( blip_eq_t const& eq )
+{
+	psg.treble_eq( eq );
+	dac_synth.treble_eq( eq );
+}
+
+void Vgm_Emu::set_voice( int i, Blip_Buffer* c, Blip_Buffer* l, Blip_Buffer* r )
+{
+	if ( i < psg.osc_count )
+		psg.osc_output( i, c, l, r );
+}
+
+void Vgm_Emu::mute_voices_( int mask )
+{
+	Classic_Emu::mute_voices_( mask );
+	dac_synth.output( &blip_buf );
+	if ( uses_fm )
+	{
+		psg.output( (mask & 0x80) ? 0 : &blip_buf );
+		if ( ym2612.enabled() )
+		{
+			dac_synth.volume( (mask & 0x40) ? 0.0 : 0.1115 / 256 * fm_gain * gain() );
+			ym2612.mute_voices( mask );
+		}
+		
+		if ( ym2413.enabled() )
+		{
+			int m = mask & 0x3F;
+			if ( mask & 0x20 )
+				m |= 0x01E0; // channels 5-8
+			if ( mask & 0x40 )
+				m |= 0x3E00;
+			ym2413.mute_voices( m );
+		}
+	}
+}
+
+blargg_err_t Vgm_Emu::load_mem_( byte const* new_data, long new_size )
+{
+	assert( offsetof (header_t,unused2 [8]) == header_size );
+	
+	if ( new_size <= header_size )
+		return gme_wrong_file_type;
+	
+	header_t const& h = *(header_t const*) new_data;
+	
+	RETURN_ERR( check_vgm_header( h ) );
+	
+	check( get_le32( h.version ) <= 0x150 );
+	
+	// psg rate
+	psg_rate = get_le32( h.psg_rate );
+	if ( !psg_rate )
+		psg_rate = 3579545;
+	blip_buf.clock_rate( psg_rate );
+	
+	data     = new_data;
+	data_end = new_data + new_size;
+	
+	// get loop
+	loop_begin = data_end;
+	if ( get_le32( h.loop_offset ) )
+		loop_begin = &data [get_le32( h.loop_offset ) + offsetof (header_t,loop_offset)];
+	
+	set_voice_count( psg.osc_count );
+	
+	RETURN_ERR( setup_fm() );
+	
+	static const char* const fm_names [] = {
+		"FM 1", "FM 2", "FM 3", "FM 4", "FM 5", "FM 6", "PCM", "PSG"
+	};
+	static const char* const psg_names [] = { "Square 1", "Square 2", "Square 3", "Noise" };
+	set_voice_names( uses_fm ? fm_names : psg_names );
+	
+	// do after FM in case output buffer is changed
+	return Classic_Emu::setup_buffer( psg_rate );
+}
+
+blargg_err_t Vgm_Emu::setup_fm()
+{
+	long ym2612_rate = get_le32( header().ym2612_rate );
+	long ym2413_rate = get_le32( header().ym2413_rate );
+	if ( ym2413_rate && get_le32( header().version ) < 0x110 )
+		update_fm_rates( &ym2413_rate, &ym2612_rate );
+	
+	uses_fm = false;
+	
+	fm_rate = blip_buf.sample_rate() * oversample_factor;
+	
+	if ( ym2612_rate )
+	{
+		uses_fm = true;
+		if ( disable_oversampling_ )
+			fm_rate = ym2612_rate / 144.0;
+		Dual_Resampler::setup( fm_rate / blip_buf.sample_rate(), rolloff, fm_gain * gain() );
+		RETURN_ERR( ym2612.set_rate( fm_rate, ym2612_rate ) );
+		ym2612.enable( true );
+		set_voice_count( 8 );
+	}
+	
+	if ( !uses_fm && ym2413_rate )
+	{
+		uses_fm = true;
+		if ( disable_oversampling_ )
+			fm_rate = ym2413_rate / 72.0;
+		Dual_Resampler::setup( fm_rate / blip_buf.sample_rate(), rolloff, fm_gain * gain() );
+		int result = ym2413.set_rate( fm_rate, ym2413_rate );
+		if ( result == 2 )
+			return "YM2413 FM sound isn't supported";
+		CHECK_ALLOC( !result );
+		ym2413.enable( true );
+		set_voice_count( 8 );
+	}
+	
+	if ( uses_fm )
+	{
+		RETURN_ERR( Dual_Resampler::reset( blip_buf.length() * blip_buf.sample_rate() / 1000 ) );
+		psg.volume( 0.135 * fm_gain * gain() );
+	}
+	else
+	{
+		ym2612.enable( false );
+		ym2413.enable( false );
+		psg.volume( gain() );
+	}
+	
+	return 0;
+}
+
+// Emulation
+
+blargg_err_t Vgm_Emu::start_track_( int track )
+{
+	RETURN_ERR( Classic_Emu::start_track_( track ) );
+	psg.reset( get_le16( header().noise_feedback ), header().noise_width );
+	
+	dac_disabled = -1;
+	pos          = data + header_size;
+	pcm_data     = pos;
+	pcm_pos      = pos;
+	dac_amp      = -1;
+	vgm_time     = 0;
+	if ( get_le32( header().version ) >= 0x150 )
+	{
+		long data_offset = get_le32( header().data_offset );
+		check( data_offset );
+		if ( data_offset )
+			pos += data_offset + offsetof (header_t,data_offset) - 0x40;
+	}
+	
+	if ( uses_fm )
+	{
+		if ( ym2413.enabled() )
+			ym2413.reset();
+		
+		if ( ym2612.enabled() )
+			ym2612.reset();
+		
+		fm_time_offset = 0;
+		blip_buf.clear();
+		Dual_Resampler::clear();
+	}
+	return 0;
+}
+
+blargg_err_t Vgm_Emu::run_clocks( blip_time_t& time_io, int msec )
+{
+	time_io = run_commands( msec * vgm_rate / 1000 );
+	psg.end_frame( time_io );
+	return 0;
+}
+
+blargg_err_t Vgm_Emu::play_( long count, sample_t* out )
+{
+	if ( !uses_fm )
+		return Classic_Emu::play_( count, out );
+		
+	Dual_Resampler::dual_play( count, out, blip_buf );
+	return 0;
+}
diff --git a/libraries/game-music-emu/gme/Vgm_Emu.h b/libraries/game-music-emu/gme/Vgm_Emu.h
new file mode 100644
index 000000000..40cfb7102
--- /dev/null
+++ b/libraries/game-music-emu/gme/Vgm_Emu.h
@@ -0,0 +1,86 @@
+// Sega Master System/Mark III, Sega Genesis/Mega Drive, BBC Micro VGM music file emulator
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef VGM_EMU_H
+#define VGM_EMU_H
+
+#include "Vgm_Emu_Impl.h"
+
+// Emulates VGM music using SN76489/SN76496 PSG, YM2612, and YM2413 FM sound chips.
+// Supports custom sound buffer and frequency equalization when VGM uses just the PSG.
+// FM sound chips can be run at their proper rates, or slightly higher to reduce
+// aliasing on high notes. Currently YM2413 support requires that you supply a
+// YM2413 sound chip emulator. I can provide one I've modified to work with the library.
+class Vgm_Emu : public Vgm_Emu_Impl {
+public:
+	// True if custom buffer and custom equalization are supported
+	// TODO: move into Music_Emu and rename to something like supports_custom_buffer()
+	bool is_classic_emu() const { return !uses_fm; }
+	
+	blargg_err_t set_multi_channel ( bool is_enabled ) override;
+	
+	// Disable running FM chips at higher than normal rate. Will result in slightly
+	// more aliasing of high notes.
+	void disable_oversampling( bool disable = true ) { disable_oversampling_ = disable; }
+	
+	// VGM header format
+	enum { header_size = 0x40 };
+	struct header_t
+	{
+		char tag [4];
+		byte data_size [4];
+		byte version [4];
+		byte psg_rate [4];
+		byte ym2413_rate [4];
+		byte gd3_offset [4];
+		byte track_duration [4];
+		byte loop_offset [4];
+		byte loop_duration [4];
+		byte frame_rate [4];
+		byte noise_feedback [2];
+		byte noise_width;
+		byte unused1;
+		byte ym2612_rate [4];
+		byte ym2151_rate [4];
+		byte data_offset [4];
+		byte unused2 [8];
+	};
+	
+	// Header for currently loaded file
+	header_t const& header() const { return *(header_t const*) data; }
+	
+	static gme_type_t static_type() { return gme_vgm_type; }
+	
+public:
+	// deprecated
+	using Music_Emu::load;
+	blargg_err_t load( header_t const& h, Data_Reader& in ) // use Remaining_Reader
+			{ return load_remaining_( &h, sizeof h, in ); }
+	byte const* gd3_data( int* size_out = 0 ) const; // use track_info()
+
+public:
+	Vgm_Emu();
+	~Vgm_Emu();
+protected:
+	blargg_err_t track_info_( track_info_t*, int track ) const;
+	blargg_err_t load_mem_( byte const*, long );
+	blargg_err_t set_sample_rate_( long sample_rate );
+	blargg_err_t start_track_( int );
+	blargg_err_t play_( long count, sample_t* );
+	blargg_err_t run_clocks( blip_time_t&, int );
+	void set_tempo_( double );
+	void mute_voices_( int mask );
+	void set_voice( int, Blip_Buffer*, Blip_Buffer*, Blip_Buffer* );
+	void update_eq( blip_eq_t const& );
+private:
+	// removed; use disable_oversampling() and set_tempo() instead
+	Vgm_Emu( bool oversample, double tempo = 1.0 );
+	double fm_rate;
+	long psg_rate;
+	long vgm_rate;
+	bool disable_oversampling_;
+	bool uses_fm;
+	blargg_err_t setup_fm();
+};
+
+#endif
diff --git a/libraries/game-music-emu/gme/Vgm_Emu_Impl.cpp b/libraries/game-music-emu/gme/Vgm_Emu_Impl.cpp
new file mode 100644
index 000000000..0d400254d
--- /dev/null
+++ b/libraries/game-music-emu/gme/Vgm_Emu_Impl.cpp
@@ -0,0 +1,314 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Vgm_Emu.h"
+
+#include <math.h>
+#include <string.h>
+#include "blargg_endian.h"
+
+/* Copyright (C) 2003-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+enum {
+	cmd_gg_stereo       = 0x4F,
+	cmd_psg             = 0x50,
+	cmd_ym2413          = 0x51,
+	cmd_ym2612_port0    = 0x52,
+	cmd_ym2612_port1    = 0x53,
+	cmd_ym2151          = 0x54,
+	cmd_delay           = 0x61,
+	cmd_delay_735       = 0x62,
+	cmd_delay_882       = 0x63,
+	cmd_byte_delay      = 0x64,
+	cmd_end             = 0x66,
+	cmd_data_block      = 0x67,
+	cmd_short_delay     = 0x70,
+	cmd_pcm_delay       = 0x80,
+	cmd_pcm_seek        = 0xE0,
+	
+	pcm_block_type      = 0x00,
+	ym2612_dac_port     = 0x2A
+};
+
+inline int command_len( int command )
+{
+	switch ( command >> 4 )
+	{
+		case 0x03:
+		case 0x04:
+			return 2;
+		
+		case 0x05:
+		case 0x0A:
+		case 0x0B:
+			return 3;
+		
+		case 0x0C:
+		case 0x0D:
+			return 4;
+		
+		case 0x0E:
+		case 0x0F:
+			return 5;
+	}
+	
+	check( false );
+	return 1;
+}
+
+template<class Emu>
+inline void Ym_Emu<Emu>::begin_frame( short* p )
+{
+	require( enabled() );
+	out = p;
+	last_time = 0;
+}
+
+template<class Emu>
+inline int Ym_Emu<Emu>::run_until( int time )
+{
+	int count = time - last_time;
+	if ( count > 0 )
+	{
+		if ( last_time < 0 )
+			return false;
+		last_time = time;
+		short* p = out;
+		out += count * Emu::out_chan_count;
+		Emu::run( count, p );
+	}
+	return true;
+}
+	
+inline Vgm_Emu_Impl::fm_time_t Vgm_Emu_Impl::to_fm_time( vgm_time_t t ) const
+{
+	return (t * fm_time_factor + fm_time_offset) >> fm_time_bits;
+}
+
+inline blip_time_t Vgm_Emu_Impl::to_blip_time( vgm_time_t t ) const
+{
+	return (t * blip_time_factor) >> blip_time_bits;
+}
+
+void Vgm_Emu_Impl::write_pcm( vgm_time_t vgm_time, int amp )
+{
+	blip_time_t blip_time = to_blip_time( vgm_time );
+	int old = dac_amp;
+	int delta = amp - old;
+	dac_amp = amp;
+	if ( old >= 0 )
+		dac_synth.offset_inline( blip_time, delta, &blip_buf );
+	else
+		dac_amp |= dac_disabled;
+}
+
+blip_time_t Vgm_Emu_Impl::run_commands( vgm_time_t end_time )
+{
+	vgm_time_t vgm_time = this->vgm_time; 
+	byte const* pos = this->pos;
+	if ( pos >= data_end )
+	{
+		set_track_ended();
+		if ( pos > data_end )
+			set_warning( "Stream lacked end event" );
+	}
+	
+	while ( vgm_time < end_time && pos < data_end )
+	{
+		// TODO: be sure there are enough bytes left in stream for particular command
+		// so we don't read past end
+		switch ( *pos++ )
+		{
+		case cmd_end:
+			pos = loop_begin; // if not looped, loop_begin == data_end
+			break;
+		
+		case cmd_delay_735:
+			vgm_time += 735;
+			break;
+		
+		case cmd_delay_882:
+			vgm_time += 882;
+			break;
+		
+		case cmd_gg_stereo:
+			psg.write_ggstereo( to_blip_time( vgm_time ), *pos++ );
+			break;
+		
+		case cmd_psg:
+			psg.write_data( to_blip_time( vgm_time ), *pos++ );
+			break;
+		
+		case cmd_delay:
+			vgm_time += pos [1] * 0x100L + pos [0];
+			pos += 2;
+			break;
+		
+		case cmd_byte_delay:
+			vgm_time += *pos++;
+			break;
+		
+		case cmd_ym2413:
+			if ( ym2413.run_until( to_fm_time( vgm_time ) ) )
+				ym2413.write( pos [0], pos [1] );
+			pos += 2;
+			break;
+		
+		case cmd_ym2612_port0:
+			if ( pos [0] == ym2612_dac_port )
+			{
+				write_pcm( vgm_time, pos [1] );
+			}
+			else if ( ym2612.run_until( to_fm_time( vgm_time ) ) )
+			{
+				if ( pos [0] == 0x2B )
+				{
+					dac_disabled = (pos [1] >> 7 & 1) - 1;
+					dac_amp |= dac_disabled;
+				}
+				ym2612.write0( pos [0], pos [1] );
+			}
+			pos += 2;
+			break;
+		
+		case cmd_ym2612_port1:
+			if ( ym2612.run_until( to_fm_time( vgm_time ) ) )
+				ym2612.write1( pos [0], pos [1] );
+			pos += 2;
+			break;
+			
+		case cmd_data_block: {
+			check( *pos == cmd_end );
+			int type = pos [1];
+			long size = get_le32( pos + 2 );
+			pos += 6;
+			if ( type == pcm_block_type )
+				pcm_data = pos;
+			pos += size;
+			break;
+		}
+		
+		case cmd_pcm_seek:
+			pcm_pos = pcm_data + pos [3] * 0x1000000L + pos [2] * 0x10000L +
+					pos [1] * 0x100L + pos [0];
+			pos += 4;
+			break;
+		
+		default:
+			int cmd = pos [-1];
+			switch ( cmd & 0xF0 )
+			{
+				case cmd_pcm_delay:
+					write_pcm( vgm_time, *pcm_pos++ );
+					vgm_time += cmd & 0x0F;
+					break;
+				
+				case cmd_short_delay:
+					vgm_time += (cmd & 0x0F) + 1;
+					break;
+				
+				case 0x50:
+					pos += 2;
+					break;
+				
+				default:
+					pos += command_len( cmd ) - 1;
+					set_warning( "Unknown stream event" );
+			}
+		}
+	}
+	vgm_time -= end_time;
+	this->pos = pos;
+	this->vgm_time = vgm_time;
+	
+	return to_blip_time( end_time );
+}
+
+int Vgm_Emu_Impl::play_frame( blip_time_t blip_time, int sample_count, sample_t* buf )
+{
+	// to do: timing is working mostly by luck
+	
+	int min_pairs = sample_count >> 1;
+	int vgm_time = ((long) min_pairs << fm_time_bits) / fm_time_factor - 1;
+	assert( to_fm_time( vgm_time ) <= min_pairs );
+	int pairs = min_pairs;
+	while ( (pairs = to_fm_time( vgm_time )) < min_pairs )
+		vgm_time++;
+	//debug_printf( "pairs: %d, min_pairs: %d\n", pairs, min_pairs );
+	
+	if ( ym2612.enabled() )
+	{
+		ym2612.begin_frame( buf );
+		memset( buf, 0, pairs * stereo * sizeof *buf );
+	}
+	else if ( ym2413.enabled() )
+	{
+		ym2413.begin_frame( buf );
+	}
+	
+	run_commands( vgm_time );
+	ym2612.run_until( pairs );
+	ym2413.run_until( pairs );
+	
+	fm_time_offset = (vgm_time * fm_time_factor + fm_time_offset) -
+			((long) pairs << fm_time_bits);
+	
+	psg.end_frame( blip_time );
+	
+	return pairs * stereo;
+}
+
+// Update pre-1.10 header FM rates by scanning commands
+void Vgm_Emu_Impl::update_fm_rates( long* ym2413_rate, long* ym2612_rate ) const
+{
+	byte const* p = data + 0x40;
+	while ( p < data_end )
+	{
+		switch ( *p )
+		{
+		case cmd_end:
+			return;
+		
+		case cmd_psg:
+		case cmd_byte_delay:
+			p += 2;
+			break;
+		
+		case cmd_delay:
+			p += 3;
+			break;
+		
+		case cmd_data_block:
+			p += 7 + get_le32( p + 3 );
+			break;
+		
+		case cmd_ym2413:
+			*ym2612_rate = 0;
+			return;
+		
+		case cmd_ym2612_port0:
+		case cmd_ym2612_port1:
+			*ym2612_rate = *ym2413_rate;
+			*ym2413_rate = 0;
+			return;
+		
+		case cmd_ym2151:
+			*ym2413_rate = 0;
+			*ym2612_rate = 0;
+			return;
+		
+		default:
+			p += command_len( *p );
+		}
+	}
+}
diff --git a/libraries/game-music-emu/gme/Vgm_Emu_Impl.h b/libraries/game-music-emu/gme/Vgm_Emu_Impl.h
new file mode 100644
index 000000000..dadbb9207
--- /dev/null
+++ b/libraries/game-music-emu/gme/Vgm_Emu_Impl.h
@@ -0,0 +1,71 @@
+// Low-level parts of Vgm_Emu
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef VGM_EMU_IMPL_H
+#define VGM_EMU_IMPL_H
+
+#include "Dual_Resampler.h"
+#include "Classic_Emu.h"
+#include "Ym2413_Emu.h"
+#include "Ym2612_Emu.h"
+#include "Sms_Apu.h"
+
+template<class Emu>
+class Ym_Emu : public Emu {
+protected:
+	int last_time;
+	short* out;
+	enum { disabled_time = -1 };
+public:
+	Ym_Emu()                        : last_time( disabled_time ), out( NULL ) { }
+	void enable( bool b )           { last_time = b ? 0 : disabled_time; }
+	bool enabled() const            { return last_time != disabled_time; }
+	void begin_frame( short* p );
+	int run_until( int time );
+};
+
+class Vgm_Emu_Impl : public Classic_Emu, private Dual_Resampler {
+public:
+	typedef Classic_Emu::sample_t sample_t;
+protected:
+	enum { stereo = 2 };
+	
+	typedef int vgm_time_t;
+	
+	enum { fm_time_bits = 12 };
+	typedef int fm_time_t;
+	long fm_time_offset;
+	int fm_time_factor;
+	fm_time_t to_fm_time( vgm_time_t ) const;
+	
+	enum { blip_time_bits = 12 };
+	int blip_time_factor;
+	blip_time_t to_blip_time( vgm_time_t ) const;
+	
+	byte const* data;
+	byte const* loop_begin;
+	byte const* data_end;
+	void update_fm_rates( long* ym2413_rate, long* ym2612_rate ) const;
+	
+	vgm_time_t vgm_time;
+	byte const* pos;
+	blip_time_t run_commands( vgm_time_t );
+	int play_frame( blip_time_t blip_time, int sample_count, sample_t* buf );
+	
+	byte const* pcm_data;
+	byte const* pcm_pos;
+	int dac_amp;
+	int dac_disabled; // -1 if disabled
+	void write_pcm( vgm_time_t, int amp );
+	
+	Ym_Emu<Ym2612_Emu> ym2612;
+	Ym_Emu<Ym2413_Emu> ym2413;
+	
+	Blip_Buffer blip_buf;
+	Sms_Apu psg;
+	Blip_Synth<blip_med_quality,1> dac_synth;
+	
+	friend class Vgm_Emu;
+};
+
+#endif
diff --git a/libraries/game-music-emu/gme/Ym2413_Emu.cpp b/libraries/game-music-emu/gme/Ym2413_Emu.cpp
new file mode 100644
index 000000000..01e796d95
--- /dev/null
+++ b/libraries/game-music-emu/gme/Ym2413_Emu.cpp
@@ -0,0 +1,21 @@
+
+// Use in place of Ym2413_Emu.cpp and ym2413.c to disable support for this chip
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Ym2413_Emu.h"
+
+Ym2413_Emu::Ym2413_Emu() { }
+
+Ym2413_Emu::~Ym2413_Emu() { }
+
+int Ym2413_Emu::set_rate( double, double ) { return 2; }
+
+void Ym2413_Emu::reset() { }
+
+void Ym2413_Emu::write( int, int ) { }
+
+void Ym2413_Emu::mute_voices( int ) { }
+
+void Ym2413_Emu::run( int, sample_t* ) { }
+
diff --git a/libraries/game-music-emu/gme/Ym2413_Emu.h b/libraries/game-music-emu/gme/Ym2413_Emu.h
new file mode 100644
index 000000000..ed4fd11df
--- /dev/null
+++ b/libraries/game-music-emu/gme/Ym2413_Emu.h
@@ -0,0 +1,33 @@
+// YM2413 FM sound chip emulator interface
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef YM2413_EMU_H
+#define YM2413_EMU_H
+
+class Ym2413_Emu  {
+	struct OPLL* opll;
+public:
+	Ym2413_Emu();
+	~Ym2413_Emu();
+	
+	// Set output sample rate and chip clock rates, in Hz. Returns non-zero
+	// if error.
+	int set_rate( double sample_rate, double clock_rate );
+	
+	// Reset to power-up state
+	void reset();
+	
+	// Mute voice n if bit n (1 << n) of mask is set
+	enum { channel_count = 14 };
+	void mute_voices( int mask );
+	
+	// Write 'data' to 'addr'
+	void write( int addr, int data );
+	
+	// Run and write pair_count samples to output
+	typedef short sample_t;
+	enum { out_chan_count = 2 }; // stereo
+	void run( int pair_count, sample_t* out );
+};
+
+#endif
diff --git a/libraries/game-music-emu/gme/Ym2612_Emu.h b/libraries/game-music-emu/gme/Ym2612_Emu.h
new file mode 100644
index 000000000..f62209a07
--- /dev/null
+++ b/libraries/game-music-emu/gme/Ym2612_Emu.h
@@ -0,0 +1,19 @@
+// YM2612 FM sound chip emulator interface
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#ifdef VGM_YM2612_GENS // LGPL v2.1+ license
+#include "Ym2612_GENS.h"
+typedef Ym2612_GENS_Emu Ym2612_Emu;
+#endif
+
+#ifdef VGM_YM2612_NUKED // LGPL v2.1+ license
+#include "Ym2612_Nuked.h"
+typedef Ym2612_Nuked_Emu Ym2612_Emu;
+#endif
+
+#ifdef VGM_YM2612_MAME // GPL v2+ license
+#include "Ym2612_MAME.h"
+typedef Ym2612_MAME_Emu Ym2612_Emu;
+#endif
+
diff --git a/libraries/game-music-emu/gme/Ym2612_GENS.cpp b/libraries/game-music-emu/gme/Ym2612_GENS.cpp
new file mode 100644
index 000000000..d9930d62b
--- /dev/null
+++ b/libraries/game-music-emu/gme/Ym2612_GENS.cpp
@@ -0,0 +1,1319 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+// Based on Gens 2.10 ym2612.c
+
+#include "Ym2612_GENS.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <stdio.h>
+#include <math.h>
+
+/* Copyright (C) 2002 Stéphane Dallongeville (gens AT consolemul.com) */
+/* Copyright (C) 2004-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+// This is mostly the original source in its C style and all.
+//
+// Somewhat optimized and simplified. Uses a template to generate the many
+// variants of Update_Chan. Rewrote header file. In need of full rewrite by
+// someone more familiar with FM sound and the YM2612. Has some inaccuracies
+// compared to the Sega Genesis sound, particularly being mixed at such a
+// high sample accuracy (the Genesis sounds like it has only 8 bit samples).
+// - Shay
+
+#ifdef BLARGG_ENABLE_OPTIMIZER
+	#include BLARGG_ENABLE_OPTIMIZER
+#endif
+
+const int output_bits = 14;
+
+struct slot_t
+{
+	const int *DT;  // parametre detune
+	int MUL;    // parametre "multiple de frequence"
+	int TL;     // Total Level = volume lorsque l'enveloppe est au plus haut
+	int TLL;    // Total Level ajusted
+	int SLL;    // Sustin Level (ajusted) = volume où l'enveloppe termine sa premiere phase de regression
+	int KSR_S;  // Key Scale Rate Shift = facteur de prise en compte du KSL dans la variations de l'enveloppe
+	int KSR;    // Key Scale Rate = cette valeur est calculee par rapport à la frequence actuelle, elle va influer
+				// sur les differents parametres de l'enveloppe comme l'attaque, le decay ...  comme dans la realite !
+	int SEG;    // Type enveloppe SSG
+	int env_xor;
+	int env_max;
+
+	const int *AR;  // Attack Rate (table pointeur) = Taux d'attaque (AR[KSR])
+	const int *DR;  // Decay Rate (table pointeur) = Taux pour la regression (DR[KSR])
+	const int *SR;  // Sustin Rate (table pointeur) = Taux pour le maintien (SR[KSR])
+	const int *RR;  // Release Rate (table pointeur) = Taux pour le rel'chement (RR[KSR])
+	int Fcnt;   // Frequency Count = compteur-frequence pour determiner l'amplitude actuelle (SIN[Finc >> 16])
+	int Finc;   // frequency step = pas d'incrementation du compteur-frequence
+				// plus le pas est grand, plus la frequence est aïgu (ou haute)
+	int Ecurp;  // Envelope current phase = cette variable permet de savoir dans quelle phase
+				// de l'enveloppe on se trouve, par exemple phase d'attaque ou phase de maintenue ...
+				// en fonction de la valeur de cette variable, on va appeler une fonction permettant
+				// de mettre à jour l'enveloppe courante.
+	int Ecnt;   // Envelope counter = le compteur-enveloppe permet de savoir où l'on se trouve dans l'enveloppe
+	int Einc;   // Envelope step courant
+	int Ecmp;   // Envelope counter limite pour la prochaine phase
+	int EincA;  // Envelope step for Attack = pas d'incrementation du compteur durant la phase d'attaque
+				// cette valeur est egal à AR[KSR]
+	int EincD;  // Envelope step for Decay = pas d'incrementation du compteur durant la phase de regression
+				// cette valeur est egal à DR[KSR]
+	int EincS;  // Envelope step for Sustain = pas d'incrementation du compteur durant la phase de maintenue
+				// cette valeur est egal à SR[KSR]
+	int EincR;  // Envelope step for Release = pas d'incrementation du compteur durant la phase de rel'chement
+				// cette valeur est egal à RR[KSR]
+	int *OUTp;  // pointeur of SLOT output = pointeur permettant de connecter la sortie de ce slot à l'entree
+				// d'un autre ou carrement à la sortie de la voie
+	int INd;    // input data of the slot = donnees en entree du slot
+	int ChgEnM; // Change envelop mask.
+	int AMS;    // AMS depth level of this SLOT = degre de modulation de l'amplitude par le LFO
+	int AMSon;  // AMS enable flag = drapeau d'activation de l'AMS
+};
+
+struct channel_t
+{
+	int S0_OUT[4];          // anciennes sorties slot 0 (pour le feed back)
+	int LEFT;               // LEFT enable flag
+	int RIGHT;              // RIGHT enable flag
+	int ALGO;               // Algorythm = determine les connections entre les operateurs
+	int FB;                 // shift count of self feed back = degre de "Feed-Back" du SLOT 1 (il est son unique entree)
+	int FMS;                // Frequency Modulation Sensitivity of channel = degre de modulation de la frequence sur la voie par le LFO
+	int AMS;                // Amplitude Modulation Sensitivity of channel = degre de modulation de l'amplitude sur la voie par le LFO
+	int FNUM[4];            // hauteur frequence de la voie (+ 3 pour le mode special)
+	int FOCT[4];            // octave de la voie (+ 3 pour le mode special)
+	int KC[4];              // Key Code = valeur fonction de la frequence (voir KSR pour les slots, KSR = KC >> KSR_S)
+	slot_t SLOT[4]; // four slot.operators = les 4 slots de la voie
+	int FFlag;              // Frequency step recalculation flag
+};
+
+struct state_t
+{
+	int TimerBase;      // TimerBase calculation
+	int Status;         // YM2612 Status (timer overflow)
+	int TimerA;         // timerA limit = valeur jusqu'à laquelle le timer A doit compter
+	int TimerAL;
+	int TimerAcnt;      // timerA counter = valeur courante du Timer A
+	int TimerB;         // timerB limit = valeur jusqu'à laquelle le timer B doit compter
+	int TimerBL;
+	int TimerBcnt;      // timerB counter = valeur courante du Timer B
+	int Mode;           // Mode actuel des voie 3 et 6 (normal / special)
+	int DAC;            // DAC enabled flag
+	channel_t CHANNEL[Ym2612_GENS_Emu::channel_count];   // Les 6 voies du YM2612
+	int REG[2][0x100];  // Sauvegardes des valeurs de tout les registres, c'est facultatif
+						// cela nous rend le debuggage plus facile
+};
+
+#ifndef PI
+#define PI 3.14159265358979323846
+#endif
+
+#define ATTACK    0
+#define DECAY     1
+#define SUBSTAIN  2
+#define RELEASE   3
+
+// SIN_LBITS <= 16
+// LFO_HBITS <= 16
+// (SIN_LBITS + SIN_HBITS) <= 26
+// (ENV_LBITS + ENV_HBITS) <= 28
+// (LFO_LBITS + LFO_HBITS) <= 28
+
+#define SIN_HBITS      12                               // Sinus phase counter int part
+#define SIN_LBITS      (26 - SIN_HBITS)                 // Sinus phase counter float part (best setting)
+
+#if (SIN_LBITS > 16)
+#define SIN_LBITS      16                               // Can't be greater than 16 bits
+#endif
+
+#define ENV_HBITS      12                               // Env phase counter int part
+#define ENV_LBITS      (28 - ENV_HBITS)                 // Env phase counter float part (best setting)
+
+#define LFO_HBITS      10                               // LFO phase counter int part
+#define LFO_LBITS      (28 - LFO_HBITS)                 // LFO phase counter float part (best setting)
+
+#define SIN_LENGHT     (1 << SIN_HBITS)
+#define ENV_LENGHT     (1 << ENV_HBITS)
+#define LFO_LENGHT     (1 << LFO_HBITS)
+
+#define TL_LENGHT      (ENV_LENGHT * 3)                 // Env + TL scaling + LFO
+
+#define SIN_MASK       (SIN_LENGHT - 1)
+#define ENV_MASK       (ENV_LENGHT - 1)
+#define LFO_MASK       (LFO_LENGHT - 1)
+
+#define ENV_STEP       (96.0 / ENV_LENGHT)              // ENV_MAX = 96 dB
+
+#define ENV_ATTACK     ((ENV_LENGHT * 0) << ENV_LBITS)
+#define ENV_DECAY      ((ENV_LENGHT * 1) << ENV_LBITS)
+#define ENV_END        ((ENV_LENGHT * 2) << ENV_LBITS)
+
+#define MAX_OUT_BITS   (SIN_HBITS + SIN_LBITS + 2)      // Modulation = -4 <--> +4
+#define MAX_OUT        ((1 << MAX_OUT_BITS) - 1)
+
+#define PG_CUT_OFF     ((int) (78.0 / ENV_STEP))
+#define ENV_CUT_OFF    ((int) (68.0 / ENV_STEP))
+
+#define AR_RATE        399128
+#define DR_RATE        5514396
+
+//#define AR_RATE        426136
+//#define DR_RATE        (AR_RATE * 12)
+
+#define LFO_FMS_LBITS  9    // FIXED (LFO_FMS_BASE gives somethink as 1)
+#define LFO_FMS_BASE   ((int) (0.05946309436 * 0.0338 * (double) (1 << LFO_FMS_LBITS)))
+
+#define S0             0    // Stupid typo of the YM2612
+#define S1             2
+#define S2             1
+#define S3             3
+
+inline void set_seg( slot_t& s, int seg )
+{
+	s.env_xor = 0;
+	s.env_max = INT_MAX;
+	s.SEG = seg;
+	if ( seg & 4 )
+	{
+		s.env_xor = ENV_MASK;
+		s.env_max = ENV_MASK;
+	}
+}
+
+struct tables_t
+{
+	short SIN_TAB [SIN_LENGHT];                 // SINUS TABLE (offset into TL TABLE)
+	int LFOcnt;         // LFO counter = compteur-frequence pour le LFO
+	int LFOinc;         // LFO step counter = pas d'incrementation du compteur-frequence du LFO
+						// plus le pas est grand, plus la frequence est grande
+	unsigned int AR_TAB [128];                  // Attack rate table
+	unsigned int DR_TAB [96];                   // Decay rate table
+	unsigned int DT_TAB [8] [32];               // Detune table
+	unsigned int SL_TAB [16];                   // Substain level table
+	unsigned int NULL_RATE [32];                // Table for NULL rate
+	int LFO_INC_TAB [8];                        // LFO step table
+
+	short ENV_TAB [2 * ENV_LENGHT + 8];         // ENV CURVE TABLE (attack & decay)
+
+	short LFO_ENV_TAB [LFO_LENGHT];             // LFO AMS TABLE (adjusted for 11.8 dB)
+	short LFO_FREQ_TAB [LFO_LENGHT];            // LFO FMS TABLE
+	int TL_TAB [TL_LENGHT * 2];                 // TOTAL LEVEL TABLE (positif and minus)
+	unsigned int DECAY_TO_ATTACK [ENV_LENGHT];  // Conversion from decay to attack phase
+	unsigned int FINC_TAB [2048];               // Frequency step table
+};
+
+static const unsigned char DT_DEF_TAB [4 * 32] =
+{
+// FD = 0
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+// FD = 1
+  0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2,
+  2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7, 8, 8, 8, 8,
+
+// FD = 2
+  1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5,
+  5, 6, 6, 7, 8, 8, 9, 10, 11, 12, 13, 14, 16, 16, 16, 16,
+
+// FD = 3
+  2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7,
+  8 , 8, 9, 10, 11, 12, 13, 14, 16, 17, 19, 20, 22, 22, 22, 22
+};
+
+static const unsigned char FKEY_TAB [16] =
+{
+	0, 0, 0, 0,
+	0, 0, 0, 1,
+	2, 3, 3, 3,
+	3, 3, 3, 3
+};
+
+static const unsigned char LFO_AMS_TAB [4] =
+{
+	31, 4, 1, 0
+};
+
+static const unsigned char LFO_FMS_TAB [8] =
+{
+	LFO_FMS_BASE * 0, LFO_FMS_BASE * 1,
+	LFO_FMS_BASE * 2, LFO_FMS_BASE * 3,
+	LFO_FMS_BASE * 4, LFO_FMS_BASE * 6,
+	LFO_FMS_BASE * 12, LFO_FMS_BASE * 24
+};
+
+inline void YM2612_Special_Update() { }
+
+struct Ym2612_GENS_Impl
+{
+	enum { channel_count = Ym2612_GENS_Emu::channel_count };
+
+	state_t YM2612;
+	int mute_mask;
+	tables_t g;
+
+	void KEY_ON( channel_t&, int );
+	void KEY_OFF( channel_t&, int );
+	int SLOT_SET( int, int );
+	int CHANNEL_SET( int, int );
+	int YM_SET( int, int );
+
+	void set_rate( double sample_rate, double clock_factor );
+	void reset();
+	void write0( int addr, int data );
+	void write1( int addr, int data );
+	void run_timer( int );
+	void run( int pair_count, Ym2612_GENS_Emu::sample_t* );
+};
+
+void Ym2612_GENS_Impl::KEY_ON( channel_t& ch, int nsl)
+{
+	slot_t *SL = &(ch.SLOT [nsl]);  // on recupere le bon pointeur de slot
+
+	if (SL->Ecurp == RELEASE)       // la touche est-elle rel'chee ?
+	{
+		SL->Fcnt = 0;
+
+		// Fix Ecco 2 splash sound
+
+		SL->Ecnt = (g.DECAY_TO_ATTACK [g.ENV_TAB [SL->Ecnt >> ENV_LBITS]] + ENV_ATTACK) & SL->ChgEnM;
+		SL->ChgEnM = ~0;
+
+//      SL->Ecnt = g.DECAY_TO_ATTACK [g.ENV_TAB [SL->Ecnt >> ENV_LBITS]] + ENV_ATTACK;
+//      SL->Ecnt = 0;
+
+		SL->Einc = SL->EincA;
+		SL->Ecmp = ENV_DECAY;
+		SL->Ecurp = ATTACK;
+	}
+}
+
+
+void Ym2612_GENS_Impl::KEY_OFF(channel_t& ch, int nsl)
+{
+	slot_t *SL = &(ch.SLOT [nsl]);  // on recupere le bon pointeur de slot
+
+	if (SL->Ecurp != RELEASE)       // la touche est-elle appuyee ?
+	{
+		if (SL->Ecnt < ENV_DECAY)   // attack phase ?
+		{
+			SL->Ecnt = (g.ENV_TAB [SL->Ecnt >> ENV_LBITS] << ENV_LBITS) + ENV_DECAY;
+		}
+
+		SL->Einc = SL->EincR;
+		SL->Ecmp = ENV_END;
+		SL->Ecurp = RELEASE;
+	}
+}
+
+
+int Ym2612_GENS_Impl::SLOT_SET( int Adr, int data )
+{
+	int nch = Adr & 3;
+	if ( nch == 3 )
+		return 1;
+
+	channel_t& ch = YM2612.CHANNEL [nch + (Adr & 0x100 ? 3 : 0)];
+	slot_t& sl = ch.SLOT [(Adr >> 2) & 3];
+
+	switch ( Adr & 0xF0 )
+	{
+		case 0x30:
+			if ( (sl.MUL = (data & 0x0F)) != 0 ) sl.MUL <<= 1;
+			else sl.MUL = 1;
+
+			sl.DT = (int*) g.DT_TAB [(data >> 4) & 7];
+
+			ch.SLOT [0].Finc = -1;
+
+			break;
+
+		case 0x40:
+			sl.TL = data & 0x7F;
+
+			// SOR2 do a lot of TL adjustement and this fix R.Shinobi jump sound...
+			YM2612_Special_Update();
+
+#if ((ENV_HBITS - 7) < 0)
+			sl.TLL = sl.TL >> (7 - ENV_HBITS);
+#else
+			sl.TLL = sl.TL << (ENV_HBITS - 7);
+#endif
+
+			break;
+
+		case 0x50:
+			sl.KSR_S = 3 - (data >> 6);
+
+			ch.SLOT [0].Finc = -1;
+
+			if (data &= 0x1F) sl.AR = (int*) &g.AR_TAB [data << 1];
+			else sl.AR = (int*) &g.NULL_RATE [0];
+
+			sl.EincA = sl.AR [sl.KSR];
+			if (sl.Ecurp == ATTACK) sl.Einc = sl.EincA;
+			break;
+
+		case 0x60:
+			if ( (sl.AMSon = (data & 0x80)) != 0 ) sl.AMS = ch.AMS;
+			else sl.AMS = 31;
+
+			if (data &= 0x1F) sl.DR = (int*) &g.DR_TAB [data << 1];
+			else sl.DR = (int*) &g.NULL_RATE [0];
+
+			sl.EincD = sl.DR [sl.KSR];
+			if (sl.Ecurp == DECAY) sl.Einc = sl.EincD;
+			break;
+
+		case 0x70:
+			if (data &= 0x1F) sl.SR = (int*) &g.DR_TAB [data << 1];
+			else sl.SR = (int*) &g.NULL_RATE [0];
+
+			sl.EincS = sl.SR [sl.KSR];
+			if ((sl.Ecurp == SUBSTAIN) && (sl.Ecnt < ENV_END)) sl.Einc = sl.EincS;
+			break;
+
+		case 0x80:
+			sl.SLL = g.SL_TAB [data >> 4];
+
+			sl.RR = (int*) &g.DR_TAB [((data & 0xF) << 2) + 2];
+
+			sl.EincR = sl.RR [sl.KSR];
+			if ((sl.Ecurp == RELEASE) && (sl.Ecnt < ENV_END)) sl.Einc = sl.EincR;
+			break;
+
+		case 0x90:
+			// SSG-EG envelope shapes :
+			/*
+			   E  At Al H
+
+			   1  0  0  0  \\\\
+			   1  0  0  1  \___
+			   1  0  1  0  \/\/
+			   1  0  1  1  \
+			   1  1  0  0  ////
+			   1  1  0  1  /
+			   1  1  1  0  /\/\
+			   1  1  1  1  /___
+
+			   E  = SSG-EG enable
+			   At = Start negate
+			   Al = Altern
+			   H  = Hold */
+
+			set_seg( sl, (data & 8) ? (data & 0x0F) : 0 );
+			break;
+	}
+
+	return 0;
+}
+
+
+int Ym2612_GENS_Impl::CHANNEL_SET( int Adr, int data )
+{
+	int num = Adr & 3;
+	if ( num == 3 )
+		return 1;
+
+	channel_t& ch = YM2612.CHANNEL [num + (Adr & 0x100 ? 3 : 0)];
+
+	switch ( Adr & 0xFC )
+	{
+		case 0xA0:
+			YM2612_Special_Update();
+
+			ch.FNUM [0] = (ch.FNUM [0] & 0x700) + data;
+			ch.KC [0] = (ch.FOCT [0] << 2) | FKEY_TAB [ch.FNUM [0] >> 7];
+
+			ch.SLOT [0].Finc = -1;
+			break;
+
+		case 0xA4:
+			YM2612_Special_Update();
+
+			ch.FNUM [0] = (ch.FNUM [0] & 0x0FF) + ((data & 0x07) << 8);
+			ch.FOCT [0] = (data & 0x38) >> 3;
+			ch.KC [0] = (ch.FOCT [0] << 2) | FKEY_TAB [ch.FNUM [0] >> 7];
+
+			ch.SLOT [0].Finc = -1;
+			break;
+
+		case 0xA8:
+			if ( Adr < 0x100 )
+			{
+				num++;
+
+				YM2612_Special_Update();
+
+				YM2612.CHANNEL [2].FNUM [num] = (YM2612.CHANNEL [2].FNUM [num] & 0x700) + data;
+				YM2612.CHANNEL [2].KC [num] = (YM2612.CHANNEL [2].FOCT [num] << 2) |
+						FKEY_TAB [YM2612.CHANNEL [2].FNUM [num] >> 7];
+
+				YM2612.CHANNEL [2].SLOT [0].Finc = -1;
+			}
+			break;
+
+		case 0xAC:
+			if ( Adr < 0x100 )
+			{
+				num++;
+
+				YM2612_Special_Update();
+
+				YM2612.CHANNEL [2].FNUM [num] = (YM2612.CHANNEL [2].FNUM [num] & 0x0FF) + ((data & 0x07) << 8);
+				YM2612.CHANNEL [2].FOCT [num] = (data & 0x38) >> 3;
+				YM2612.CHANNEL [2].KC [num] = (YM2612.CHANNEL [2].FOCT [num] << 2) |
+						FKEY_TAB [YM2612.CHANNEL [2].FNUM [num] >> 7];
+
+				YM2612.CHANNEL [2].SLOT [0].Finc = -1;
+			}
+			break;
+
+		case 0xB0:
+			if ( ch.ALGO != (data & 7) )
+			{
+				// Fix VectorMan 2 heli sound (level 1)
+				YM2612_Special_Update();
+
+				ch.ALGO = data & 7;
+
+				ch.SLOT [0].ChgEnM = 0;
+				ch.SLOT [1].ChgEnM = 0;
+				ch.SLOT [2].ChgEnM = 0;
+				ch.SLOT [3].ChgEnM = 0;
+			}
+
+			ch.FB = 9 - ((data >> 3) & 7);                              // Real thing ?
+
+//          if (ch.FB = ((data >> 3) & 7)) ch.FB = 9 - ch.FB;       // Thunder force 4 (music stage 8), Gynoug, Aladdin bug sound...
+//          else ch.FB = 31;
+			break;
+
+		case 0xB4: {
+			YM2612_Special_Update();
+
+			ch.LEFT = 0 - ((data >> 7) & 1);
+			ch.RIGHT = 0 - ((data >> 6) & 1);
+
+			ch.AMS = LFO_AMS_TAB [(data >> 4) & 3];
+			ch.FMS = LFO_FMS_TAB [data & 7];
+
+			for ( int i = 0; i < 4; i++ )
+			{
+				slot_t& sl = ch.SLOT [i];
+				sl.AMS = (sl.AMSon ? ch.AMS : 31);
+			}
+			break;
+		}
+	}
+
+	return 0;
+}
+
+
+int Ym2612_GENS_Impl::YM_SET(int Adr, int data)
+{
+	switch ( Adr )
+	{
+		case 0x22:
+			if (data & 8) // LFO enable
+			{
+				// Cool Spot music 1, LFO modified severals time which
+				// distord the sound, have to check that on a real genesis...
+
+				g.LFOinc = g.LFO_INC_TAB [data & 7];
+			}
+			else
+			{
+				g.LFOinc = g.LFOcnt = 0;
+			}
+			break;
+
+		case 0x24:
+			YM2612.TimerA = (YM2612.TimerA & 0x003) | (((int) data) << 2);
+
+			if (YM2612.TimerAL != (1024 - YM2612.TimerA) << 12)
+			{
+				YM2612.TimerAcnt = YM2612.TimerAL = (1024 - YM2612.TimerA) << 12;
+			}
+			break;
+
+		case 0x25:
+			YM2612.TimerA = (YM2612.TimerA & 0x3FC) | (data & 3);
+
+			if (YM2612.TimerAL != (1024 - YM2612.TimerA) << 12)
+			{
+				YM2612.TimerAcnt = YM2612.TimerAL = (1024 - YM2612.TimerA) << 12;
+			}
+			break;
+
+		case 0x26:
+			YM2612.TimerB = data;
+
+			if (YM2612.TimerBL != (256 - YM2612.TimerB) << (4 + 12))
+			{
+				YM2612.TimerBcnt = YM2612.TimerBL = (256 - YM2612.TimerB) << (4 + 12);
+			}
+			break;
+
+		case 0x27:
+			// Parametre divers
+			// b7 = CSM MODE
+			// b6 = 3 slot mode
+			// b5 = reset b
+			// b4 = reset a
+			// b3 = timer enable b
+			// b2 = timer enable a
+			// b1 = load b
+			// b0 = load a
+
+			if ((data ^ YM2612.Mode) & 0x40)
+			{
+				// We changed the channel 2 mode, so recalculate phase step
+				// This fix the punch sound in Street of Rage 2
+
+				YM2612_Special_Update();
+
+				YM2612.CHANNEL [2].SLOT [0].Finc = -1;      // recalculate phase step
+			}
+
+//          if ((data & 2) && (YM2612.Status & 2)) YM2612.TimerBcnt = YM2612.TimerBL;
+//          if ((data & 1) && (YM2612.Status & 1)) YM2612.TimerAcnt = YM2612.TimerAL;
+
+//          YM2612.Status &= (~data >> 4);                  // Reset du Status au cas ou c'est demande
+			YM2612.Status &= (~data >> 4) & (data >> 2);    // Reset Status
+
+			YM2612.Mode = data;
+			break;
+
+		case 0x28: {
+			int nch = data & 3;
+			if ( nch == 3 )
+				return 1;
+			if ( data & 4 )
+				nch += 3;
+			channel_t& ch = YM2612.CHANNEL [nch];
+
+			YM2612_Special_Update();
+
+			if (data & 0x10) KEY_ON(ch, S0);    // On appuie sur la touche pour le slot 1
+			else KEY_OFF(ch, S0);               // On rel'che la touche pour le slot 1
+			if (data & 0x20) KEY_ON(ch, S1);    // On appuie sur la touche pour le slot 3
+			else KEY_OFF(ch, S1);               // On rel'che la touche pour le slot 3
+			if (data & 0x40) KEY_ON(ch, S2);    // On appuie sur la touche pour le slot 2
+			else KEY_OFF(ch, S2);               // On rel'che la touche pour le slot 2
+			if (data & 0x80) KEY_ON(ch, S3);    // On appuie sur la touche pour le slot 4
+			else KEY_OFF(ch, S3);               // On rel'che la touche pour le slot 4
+			break;
+		}
+
+		case 0x2B:
+			if (YM2612.DAC ^ (data & 0x80)) YM2612_Special_Update();
+
+			YM2612.DAC = data & 0x80;   // activation/desactivation du DAC
+			break;
+	}
+
+	return 0;
+}
+
+void Ym2612_GENS_Impl::set_rate( double sample_rate, double clock_rate )
+{
+	assert( sample_rate );
+	assert( clock_rate > sample_rate );
+
+	int i;
+
+	// 144 = 12 * (prescale * 2) = 12 * 6 * 2
+	// prescale set to 6 by default
+
+	double Frequence = clock_rate / sample_rate / 144.0;
+	if ( fabs( Frequence - 1.0 ) < 0.0000001 )
+		Frequence = 1.0;
+	YM2612.TimerBase = int (Frequence * 4096.0);
+
+	// Tableau TL :
+	// [0     -  4095] = +output  [4095  - ...] = +output overflow (fill with 0)
+	// [12288 - 16383] = -output  [16384 - ...] = -output overflow (fill with 0)
+
+	for(i = 0; i < TL_LENGHT; i++)
+	{
+		if (i >= PG_CUT_OFF)    // YM2612 cut off sound after 78 dB (14 bits output ?)
+		{
+			g.TL_TAB [TL_LENGHT + i] = g.TL_TAB [i] = 0;
+		}
+		else
+		{
+			double x = MAX_OUT;                         // Max output
+			x /= pow( 10.0, (ENV_STEP * i) / 20.0 );    // Decibel -> Voltage
+
+			g.TL_TAB [i] = (int) x;
+			g.TL_TAB [TL_LENGHT + i] = -g.TL_TAB [i];
+		}
+	}
+
+	// Tableau SIN :
+	// g.SIN_TAB [x] [y] = sin(x) * y;
+	// x = phase and y = volume
+
+	g.SIN_TAB [0] = g.SIN_TAB [SIN_LENGHT / 2] = PG_CUT_OFF;
+
+	for(i = 1; i <= SIN_LENGHT / 4; i++)
+	{
+		double x = sin(2.0 * PI * (double) (i) / (double) (SIN_LENGHT));    // Sinus
+		x = 20 * log10(1 / x);                                      // convert to dB
+
+		int j = (int) (x / ENV_STEP);                       // Get TL range
+
+		if (j > PG_CUT_OFF) j = (int) PG_CUT_OFF;
+
+		g.SIN_TAB [i] = g.SIN_TAB [(SIN_LENGHT / 2) - i] = j;
+		g.SIN_TAB [(SIN_LENGHT / 2) + i] = g.SIN_TAB [SIN_LENGHT - i] = TL_LENGHT + j;
+	}
+
+	// Tableau LFO (LFO wav) :
+
+	for(i = 0; i < LFO_LENGHT; i++)
+	{
+		double x = sin(2.0 * PI * (double) (i) / (double) (LFO_LENGHT));    // Sinus
+		x += 1.0;
+		x /= 2.0;                   // positive only
+		x *= 11.8 / ENV_STEP;       // ajusted to MAX enveloppe modulation
+
+		g.LFO_ENV_TAB [i] = (int) x;
+
+		x = sin(2.0 * PI * (double) (i) / (double) (LFO_LENGHT));   // Sinus
+		x *= (double) ((1 << (LFO_HBITS - 1)) - 1);
+
+		g.LFO_FREQ_TAB [i] = (int) x;
+
+	}
+
+	// Tableau Enveloppe :
+	// g.ENV_TAB [0] -> g.ENV_TAB [ENV_LENGHT - 1]              = attack curve
+	// g.ENV_TAB [ENV_LENGHT] -> g.ENV_TAB [2 * ENV_LENGHT - 1] = decay curve
+
+	for(i = 0; i < ENV_LENGHT; i++)
+	{
+		// Attack curve (x^8 - music level 2 Vectorman 2)
+		double x = pow(((double) ((ENV_LENGHT - 1) - i) / (double) (ENV_LENGHT)), 8);
+		x *= ENV_LENGHT;
+
+		g.ENV_TAB [i] = (int) x;
+
+		// Decay curve (just linear)
+		x = pow(((double) (i) / (double) (ENV_LENGHT)), 1);
+		x *= ENV_LENGHT;
+
+		g.ENV_TAB [ENV_LENGHT + i] = (int) x;
+	}
+	for ( i = 0; i < 8; i++ )
+		g.ENV_TAB [i + ENV_LENGHT * 2] = 0;
+
+	g.ENV_TAB [ENV_END >> ENV_LBITS] = ENV_LENGHT - 1;      // for the stopped state
+
+	// Tableau pour la conversion Attack -> Decay and Decay -> Attack
+
+	int j = ENV_LENGHT - 1;
+	for ( i = 0; i < ENV_LENGHT; i++ )
+	{
+		while ( j && g.ENV_TAB [j] < i )
+			j--;
+
+		g.DECAY_TO_ATTACK [i] = j << ENV_LBITS;
+	}
+
+	// Tableau pour le Substain Level
+
+	for(i = 0; i < 15; i++)
+	{
+		double x = i * 3;           // 3 and not 6 (Mickey Mania first music for test)
+		x /= ENV_STEP;
+
+		g.SL_TAB [i] = ((int) x << ENV_LBITS) + ENV_DECAY;
+	}
+
+	g.SL_TAB [15] = ((ENV_LENGHT - 1) << ENV_LBITS) + ENV_DECAY; // special case : volume off
+
+	// Tableau Frequency Step
+
+	for(i = 0; i < 2048; i++)
+	{
+		double x = (double) (i) * Frequence;
+
+#if ((SIN_LBITS + SIN_HBITS - (21 - 7)) < 0)
+		x /= (double) (1 << ((21 - 7) - SIN_LBITS - SIN_HBITS));
+#else
+		x *= (double) (1 << (SIN_LBITS + SIN_HBITS - (21 - 7)));
+#endif
+
+		x /= 2.0;   // because MUL = value * 2
+
+		g.FINC_TAB [i] = (unsigned int) x;
+	}
+
+	// Tableaux Attack & Decay Rate
+
+	for(i = 0; i < 4; i++)
+	{
+		g.AR_TAB [i] = 0;
+		g.DR_TAB [i] = 0;
+	}
+
+	for(i = 0; i < 60; i++)
+	{
+		double x = Frequence;
+
+		x *= 1.0 + ((i & 3) * 0.25);                    // bits 0-1 : x1.00, x1.25, x1.50, x1.75
+		x *= (double) (1 << ((i >> 2)));                // bits 2-5 : shift bits (x2^0 - x2^15)
+		x *= (double) (ENV_LENGHT << ENV_LBITS);        // on ajuste pour le tableau g.ENV_TAB
+
+		g.AR_TAB [i + 4] = (unsigned int) (x / AR_RATE);
+		g.DR_TAB [i + 4] = (unsigned int) (x / DR_RATE);
+	}
+
+	for(i = 64; i < 96; i++)
+	{
+		g.AR_TAB [i] = g.AR_TAB [63];
+		g.DR_TAB [i] = g.DR_TAB [63];
+
+		g.NULL_RATE [i - 64] = 0;
+	}
+
+	for ( i = 96; i < 128; i++ )
+		g.AR_TAB [i] = 0;
+
+	// Tableau Detune
+
+	for(i = 0; i < 4; i++)
+	{
+		for (int j = 0; j < 32; j++)
+		{
+#if ((SIN_LBITS + SIN_HBITS - 21) < 0)
+			double y = (double) DT_DEF_TAB [(i << 5) + j] * Frequence / (double) (1 << (21 - SIN_LBITS - SIN_HBITS));
+#else
+			double y = (double) DT_DEF_TAB [(i << 5) + j] * Frequence * (double) (1 << (SIN_LBITS + SIN_HBITS - 21));
+#endif
+
+			g.DT_TAB [i + 0] [j] = (int)  y;
+			g.DT_TAB [i + 4] [j] = (int) -y;
+		}
+	}
+
+	// Tableau LFO
+	g.LFO_INC_TAB [0] = (unsigned int) (3.98 * (double) (1 << (LFO_HBITS + LFO_LBITS)) / sample_rate);
+	g.LFO_INC_TAB [1] = (unsigned int) (5.56 * (double) (1 << (LFO_HBITS + LFO_LBITS)) / sample_rate);
+	g.LFO_INC_TAB [2] = (unsigned int) (6.02 * (double) (1 << (LFO_HBITS + LFO_LBITS)) / sample_rate);
+	g.LFO_INC_TAB [3] = (unsigned int) (6.37 * (double) (1 << (LFO_HBITS + LFO_LBITS)) / sample_rate);
+	g.LFO_INC_TAB [4] = (unsigned int) (6.88 * (double) (1 << (LFO_HBITS + LFO_LBITS)) / sample_rate);
+	g.LFO_INC_TAB [5] = (unsigned int) (9.63 * (double) (1 << (LFO_HBITS + LFO_LBITS)) / sample_rate);
+	g.LFO_INC_TAB [6] = (unsigned int) (48.1 * (double) (1 << (LFO_HBITS + LFO_LBITS)) / sample_rate);
+	g.LFO_INC_TAB [7] = (unsigned int) (72.2 * (double) (1 << (LFO_HBITS + LFO_LBITS)) / sample_rate);
+
+	reset();
+}
+
+const char* Ym2612_GENS_Emu::set_rate( double sample_rate, double clock_rate )
+{
+	if ( !impl )
+	{
+		impl = (Ym2612_GENS_Impl*) malloc( sizeof *impl );
+		if ( !impl )
+			return "Out of memory";
+		impl->mute_mask = 0;
+	}
+	memset( &impl->YM2612, 0, sizeof impl->YM2612 );
+
+	impl->set_rate( sample_rate, clock_rate );
+
+	return 0;
+}
+
+Ym2612_GENS_Emu::~Ym2612_GENS_Emu()
+{
+	free( impl );
+}
+
+inline void Ym2612_GENS_Impl::write0( int opn_addr, int data )
+{
+	assert( (unsigned) data <= 0xFF );
+
+	if ( opn_addr < 0x30 )
+	{
+		YM2612.REG [0] [opn_addr] = data;
+		YM_SET( opn_addr, data );
+	}
+	else if ( YM2612.REG [0] [opn_addr] != data )
+	{
+		YM2612.REG [0] [opn_addr] = data;
+
+		if ( opn_addr < 0xA0 )
+			SLOT_SET( opn_addr, data );
+		else
+			CHANNEL_SET( opn_addr, data );
+	}
+}
+
+inline void Ym2612_GENS_Impl::write1( int opn_addr, int data )
+{
+	assert( (unsigned) data <= 0xFF );
+
+	if ( opn_addr >= 0x30 && YM2612.REG [1] [opn_addr] != data )
+	{
+		YM2612.REG [1] [opn_addr] = data;
+
+		if ( opn_addr < 0xA0 )
+			SLOT_SET( opn_addr + 0x100, data );
+		else
+			CHANNEL_SET( opn_addr + 0x100, data );
+	}
+}
+
+void Ym2612_GENS_Emu::reset()
+{
+	impl->reset();
+}
+
+void Ym2612_GENS_Impl::reset()
+{
+	g.LFOcnt = 0;
+	YM2612.TimerA = 0;
+	YM2612.TimerAL = 0;
+	YM2612.TimerAcnt = 0;
+	YM2612.TimerB = 0;
+	YM2612.TimerBL = 0;
+	YM2612.TimerBcnt = 0;
+	YM2612.DAC = 0;
+
+	YM2612.Status = 0;
+
+	int i;
+	for ( i = 0; i < channel_count; i++ )
+	{
+		channel_t& ch = YM2612.CHANNEL [i];
+
+		ch.LEFT = ~0;
+		ch.RIGHT = ~0;
+		ch.ALGO = 0;
+		ch.FB = 31;
+		ch.FMS = 0;
+		ch.AMS = 0;
+
+		for ( int j = 0 ;j < 4 ; j++ )
+		{
+			ch.S0_OUT [j] = 0;
+			ch.FNUM [j] = 0;
+			ch.FOCT [j] = 0;
+			ch.KC [j] = 0;
+
+			ch.SLOT [j].Fcnt = 0;
+			ch.SLOT [j].Finc = 0;
+			ch.SLOT [j].Ecnt = ENV_END;     // Put it at the end of Decay phase...
+			ch.SLOT [j].Einc = 0;
+			ch.SLOT [j].Ecmp = 0;
+			ch.SLOT [j].Ecurp = RELEASE;
+
+			ch.SLOT [j].ChgEnM = 0;
+		}
+	}
+
+	for ( i = 0; i < 0x100; i++ )
+	{
+		YM2612.REG [0] [i] = -1;
+		YM2612.REG [1] [i] = -1;
+	}
+
+	for ( i = 0xB6; i >= 0xB4; i-- )
+	{
+		write0( i, 0xC0 );
+		write1( i, 0xC0 );
+	}
+
+	for ( i = 0xB2; i >= 0x22; i-- )
+	{
+		write0( i, 0 );
+		write1( i, 0 );
+	}
+
+	write0( 0x2A, 0x80 );
+}
+
+void Ym2612_GENS_Emu::write0( int addr, int data )
+{
+	impl->write0( addr, data );
+}
+
+void Ym2612_GENS_Emu::write1( int addr, int data )
+{
+	impl->write1( addr, data );
+}
+
+void Ym2612_GENS_Emu::mute_voices( int mask ) { impl->mute_mask = mask; }
+
+static void update_envelope_( slot_t* sl )
+{
+	switch ( sl->Ecurp )
+	{
+	case 0:
+		// Env_Attack_Next
+
+		// Verified with Gynoug even in HQ (explode SFX)
+		sl->Ecnt = ENV_DECAY;
+
+		sl->Einc = sl->EincD;
+		sl->Ecmp = sl->SLL;
+		sl->Ecurp = DECAY;
+		break;
+
+	case 1:
+		// Env_Decay_Next
+
+		// Verified with Gynoug even in HQ (explode SFX)
+		sl->Ecnt = sl->SLL;
+
+		sl->Einc = sl->EincS;
+		sl->Ecmp = ENV_END;
+		sl->Ecurp = SUBSTAIN;
+		break;
+
+	case 2:
+		// Env_Substain_Next(slot_t *SL)
+		if (sl->SEG & 8)    // SSG envelope type
+		{
+			int release = sl->SEG & 1;
+
+			if ( !release )
+			{
+				// re KEY ON
+
+				// sl->Fcnt = 0;
+				// sl->ChgEnM = ~0;
+
+				sl->Ecnt = 0;
+				sl->Einc = sl->EincA;
+				sl->Ecmp = ENV_DECAY;
+				sl->Ecurp = ATTACK;
+			}
+
+			set_seg( *sl, (sl->SEG << 1) & 4 );
+
+			if ( !release )
+				break;
+		}
+		// fall through
+
+	case 3:
+		// Env_Release_Next
+		sl->Ecnt = ENV_END;
+		sl->Einc = 0;
+		sl->Ecmp = ENV_END + 1;
+		break;
+
+	// default: no op
+	}
+}
+
+inline void update_envelope( slot_t& sl )
+{
+	int ecmp = sl.Ecmp;
+	if ( (sl.Ecnt += sl.Einc) >= ecmp )
+		update_envelope_( &sl );
+}
+
+template<int algo>
+struct ym2612_update_chan {
+	static void func( tables_t&, channel_t&, Ym2612_GENS_Emu::sample_t*, int );
+};
+
+typedef void (*ym2612_update_chan_t)( tables_t&, channel_t&, Ym2612_GENS_Emu::sample_t*, int );
+
+template<int algo>
+void ym2612_update_chan<algo>::func( tables_t& g, channel_t& ch,
+		Ym2612_GENS_Emu::sample_t* buf, int length )
+{
+	int not_end = ch.SLOT [S3].Ecnt - ENV_END;
+
+	// algo is a compile-time constant, so all conditions based on it are resolved
+	// during compilation
+
+	// special cases
+	if ( algo == 7 )
+		not_end |= ch.SLOT [S0].Ecnt - ENV_END;
+
+	if ( algo >= 5 )
+		not_end |= ch.SLOT [S2].Ecnt - ENV_END;
+
+	if ( algo >= 4 )
+		not_end |= ch.SLOT [S1].Ecnt - ENV_END;
+
+	int CH_S0_OUT_1 = ch.S0_OUT [1];
+
+	int in0 = ch.SLOT [S0].Fcnt;
+	int in1 = ch.SLOT [S1].Fcnt;
+	int in2 = ch.SLOT [S2].Fcnt;
+	int in3 = ch.SLOT [S3].Fcnt;
+
+	int YM2612_LFOinc = g.LFOinc;
+	int YM2612_LFOcnt = g.LFOcnt + YM2612_LFOinc;
+
+	if ( !not_end )
+		return;
+
+	do
+	{
+		// envelope
+		int const env_LFO = g.LFO_ENV_TAB [YM2612_LFOcnt >> LFO_LBITS & LFO_MASK];
+
+		short const* const ENV_TAB = g.ENV_TAB;
+
+	#define CALC_EN( x ) \
+		int temp##x = ENV_TAB [ch.SLOT [S##x].Ecnt >> ENV_LBITS] + ch.SLOT [S##x].TLL;  \
+		int en##x = ((temp##x ^ ch.SLOT [S##x].env_xor) + (env_LFO >> ch.SLOT [S##x].AMS)) &    \
+				((temp##x - ch.SLOT [S##x].env_max) >> 31);
+
+		CALC_EN( 0 )
+		CALC_EN( 1 )
+		CALC_EN( 2 )
+		CALC_EN( 3 )
+
+		int const* const TL_TAB = g.TL_TAB;
+
+	#define SINT( i, o ) (TL_TAB [g.SIN_TAB [(i)] + (o)])
+
+		// feedback
+		int CH_S0_OUT_0 = ch.S0_OUT [0];
+		{
+			int temp = in0 + ((CH_S0_OUT_0 + CH_S0_OUT_1) >> ch.FB);
+			CH_S0_OUT_1 = CH_S0_OUT_0;
+			CH_S0_OUT_0 = SINT( (temp >> SIN_LBITS) & SIN_MASK, en0 );
+		}
+
+		int CH_OUTd;
+		if ( algo == 0 )
+		{
+			int temp = in1 + CH_S0_OUT_1;
+			temp = in2 + SINT( (temp >> SIN_LBITS) & SIN_MASK, en1 );
+			temp = in3 + SINT( (temp >> SIN_LBITS) & SIN_MASK, en2 );
+			CH_OUTd = SINT( (temp >> SIN_LBITS) & SIN_MASK, en3 );
+		}
+		else if ( algo == 1 )
+		{
+			int temp = in2 + CH_S0_OUT_1 + SINT( (in1 >> SIN_LBITS) & SIN_MASK, en1 );
+			temp = in3 + SINT( (temp >> SIN_LBITS) & SIN_MASK, en2 );
+			CH_OUTd = SINT( (temp >> SIN_LBITS) & SIN_MASK, en3 );
+		}
+		else if ( algo == 2 )
+		{
+			int temp = in2 + SINT( (in1 >> SIN_LBITS) & SIN_MASK, en1 );
+			temp = in3 + CH_S0_OUT_1 + SINT( (temp >> SIN_LBITS) & SIN_MASK, en2 );
+			CH_OUTd = SINT( (temp >> SIN_LBITS) & SIN_MASK, en3 );
+		}
+		else if ( algo == 3 )
+		{
+			int temp = in1 + CH_S0_OUT_1;
+			temp = in3 + SINT( (temp >> SIN_LBITS) & SIN_MASK, en1 ) +
+					SINT( (in2 >> SIN_LBITS) & SIN_MASK, en2 );
+			CH_OUTd = SINT( (temp >> SIN_LBITS) & SIN_MASK, en3 );
+		}
+		else if ( algo == 4 )
+		{
+			int temp = in3 + SINT( (in2 >> SIN_LBITS) & SIN_MASK, en2 );
+			CH_OUTd = SINT( (temp >> SIN_LBITS) & SIN_MASK, en3 ) +
+					SINT( ((in1 + CH_S0_OUT_1) >> SIN_LBITS) & SIN_MASK, en1 );
+			//DO_LIMIT
+		}
+		else if ( algo == 5 )
+		{
+			int temp = CH_S0_OUT_1;
+			CH_OUTd = SINT( ((in3 + temp) >> SIN_LBITS) & SIN_MASK, en3 ) +
+					SINT( ((in1 + temp) >> SIN_LBITS) & SIN_MASK, en1 ) +
+					SINT( ((in2 + temp) >> SIN_LBITS) & SIN_MASK, en2 );
+			//DO_LIMIT
+		}
+		else if ( algo == 6 )
+		{
+			CH_OUTd = SINT( (in3 >> SIN_LBITS) & SIN_MASK, en3 ) +
+					SINT( ((in1 + CH_S0_OUT_1) >> SIN_LBITS) & SIN_MASK, en1 ) +
+					SINT( (in2 >> SIN_LBITS) & SIN_MASK, en2 );
+			//DO_LIMIT
+		}
+		else if ( algo == 7 )
+		{
+			CH_OUTd = SINT( (in3 >> SIN_LBITS) & SIN_MASK, en3 ) +
+					SINT( (in1 >> SIN_LBITS) & SIN_MASK, en1 ) +
+					SINT( (in2 >> SIN_LBITS) & SIN_MASK, en2 ) + CH_S0_OUT_1;
+			//DO_LIMIT
+		}
+
+		CH_OUTd >>= MAX_OUT_BITS - output_bits + 2;
+
+		// update phase
+		unsigned freq_LFO = ((g.LFO_FREQ_TAB [YM2612_LFOcnt >> LFO_LBITS & LFO_MASK] *
+				ch.FMS) >> (LFO_HBITS - 1 + 1)) + (1L << (LFO_FMS_LBITS - 1));
+		YM2612_LFOcnt += YM2612_LFOinc;
+		in0 += (ch.SLOT [S0].Finc * freq_LFO) >> (LFO_FMS_LBITS - 1);
+		in1 += (ch.SLOT [S1].Finc * freq_LFO) >> (LFO_FMS_LBITS - 1);
+		in2 += (ch.SLOT [S2].Finc * freq_LFO) >> (LFO_FMS_LBITS - 1);
+		in3 += (ch.SLOT [S3].Finc * freq_LFO) >> (LFO_FMS_LBITS - 1);
+
+		int t0 = buf [0] + (CH_OUTd & ch.LEFT);
+		int t1 = buf [1] + (CH_OUTd & ch.RIGHT);
+
+		update_envelope( ch.SLOT [0] );
+		update_envelope( ch.SLOT [1] );
+		update_envelope( ch.SLOT [2] );
+		update_envelope( ch.SLOT [3] );
+
+		ch.S0_OUT [0] = CH_S0_OUT_0;
+		buf [0] = t0;
+		buf [1] = t1;
+		buf += 2;
+	}
+	while ( --length );
+
+	ch.S0_OUT [1] = CH_S0_OUT_1;
+
+	ch.SLOT [S0].Fcnt = in0;
+	ch.SLOT [S1].Fcnt = in1;
+	ch.SLOT [S2].Fcnt = in2;
+	ch.SLOT [S3].Fcnt = in3;
+}
+
+static const ym2612_update_chan_t UPDATE_CHAN [8] = {
+	&ym2612_update_chan<0>::func,
+	&ym2612_update_chan<1>::func,
+	&ym2612_update_chan<2>::func,
+	&ym2612_update_chan<3>::func,
+	&ym2612_update_chan<4>::func,
+	&ym2612_update_chan<5>::func,
+	&ym2612_update_chan<6>::func,
+	&ym2612_update_chan<7>::func
+};
+
+void Ym2612_GENS_Impl::run_timer( int length )
+{
+	int const step = 6;
+	int remain = length;
+	do
+	{
+		int n = step;
+		if ( n > remain )
+			n = remain;
+		remain -= n;
+
+		long i = n * YM2612.TimerBase;
+		if (YM2612.Mode & 1)                            // Timer A ON ?
+		{
+	//      if ((YM2612.TimerAcnt -= 14073) <= 0)       // 13879=NTSC (old: 14475=NTSC  14586=PAL)
+			if ((YM2612.TimerAcnt -= i) <= 0)
+			{
+				// timer a overflow
+
+				YM2612.Status |= (YM2612.Mode & 0x04) >> 2;
+				YM2612.TimerAcnt += YM2612.TimerAL;
+
+				if (YM2612.Mode & 0x80)
+				{
+					KEY_ON( YM2612.CHANNEL [2], 0 );
+					KEY_ON( YM2612.CHANNEL [2], 1 );
+					KEY_ON( YM2612.CHANNEL [2], 2 );
+					KEY_ON( YM2612.CHANNEL [2], 3 );
+				}
+			}
+		}
+
+		if (YM2612.Mode & 2)                            // Timer B ON ?
+		{
+	//      if ((YM2612.TimerBcnt -= 14073) <= 0)       // 13879=NTSC (old: 14475=NTSC  14586=PAL)
+			if ((YM2612.TimerBcnt -= i) <= 0)
+			{
+				// timer b overflow
+				YM2612.Status |= (YM2612.Mode & 0x08) >> 2;
+				YM2612.TimerBcnt += YM2612.TimerBL;
+			}
+		}
+	}
+	while ( remain > 0 );
+}
+
+void Ym2612_GENS_Impl::run( int pair_count, Ym2612_GENS_Emu::sample_t* out )
+{
+	if ( pair_count <= 0 )
+		return;
+
+	if ( YM2612.Mode & 3 )
+		run_timer( pair_count );
+
+	// Mise à jour des pas des compteurs-frequences s'ils ont ete modifies
+
+	for ( int chi = 0; chi < channel_count; chi++ )
+	{
+		channel_t& ch = YM2612.CHANNEL [chi];
+		if ( ch.SLOT [0].Finc != -1 )
+			continue;
+
+		int i2 = 0;
+		if ( chi == 2 && (YM2612.Mode & 0x40) )
+			i2 = 2;
+
+		for ( int i = 0; i < 4; i++ )
+		{
+			// static int seq [4] = { 2, 1, 3, 0 };
+			// if ( i2 ) i2 = seq [i];
+
+			slot_t& sl = ch.SLOT [i];
+			int finc = g.FINC_TAB [ch.FNUM [i2]] >> (7 - ch.FOCT [i2]);
+			int ksr = ch.KC [i2] >> sl.KSR_S;   // keycode attenuation
+			sl.Finc = (finc + sl.DT [ch.KC [i2]]) * sl.MUL;
+			if (sl.KSR != ksr)          // si le KSR a change alors
+			{                       // les differents taux pour l'enveloppe sont mis à jour
+				sl.KSR = ksr;
+
+				sl.EincA = sl.AR [ksr];
+				sl.EincD = sl.DR [ksr];
+				sl.EincS = sl.SR [ksr];
+				sl.EincR = sl.RR [ksr];
+
+				if (sl.Ecurp == ATTACK)
+				{
+					sl.Einc = sl.EincA;
+				}
+				else if (sl.Ecurp == DECAY)
+				{
+					sl.Einc = sl.EincD;
+				}
+				else if (sl.Ecnt < ENV_END)
+				{
+					if (sl.Ecurp == SUBSTAIN)
+						sl.Einc = sl.EincS;
+					else if (sl.Ecurp == RELEASE)
+						sl.Einc = sl.EincR;
+				}
+			}
+
+			if ( i2 )
+				i2 = (i2 ^ 2) ^ (i2 >> 1);
+		}
+	}
+
+	for ( int i = 0; i < channel_count; i++ )
+	{
+		if ( !(mute_mask & (1 << i)) && (i != 5 || !YM2612.DAC) )
+			UPDATE_CHAN [YM2612.CHANNEL [i].ALGO]( g, YM2612.CHANNEL [i], out, pair_count );
+	}
+
+	g.LFOcnt += g.LFOinc * pair_count;
+}
+
+void Ym2612_GENS_Emu::run( int pair_count, sample_t* out ) { impl->run( pair_count, out ); }
diff --git a/libraries/game-music-emu/gme/Ym2612_GENS.h b/libraries/game-music-emu/gme/Ym2612_GENS.h
new file mode 100644
index 000000000..4cb2e8ae3
--- /dev/null
+++ b/libraries/game-music-emu/gme/Ym2612_GENS.h
@@ -0,0 +1,38 @@
+// YM2612 FM sound chip emulator interface
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef YM2612_EMU_H
+#define YM2612_EMU_H
+
+struct Ym2612_GENS_Impl;
+
+class Ym2612_GENS_Emu  {
+	Ym2612_GENS_Impl* impl;
+public:
+	Ym2612_GENS_Emu() { impl = 0; }
+	~Ym2612_GENS_Emu();
+
+	// Set output sample rate and chip clock rates, in Hz. Returns non-zero
+	// if error.
+	const char* set_rate( double sample_rate, double clock_rate );
+
+	// Reset to power-up state
+	void reset();
+
+	// Mute voice n if bit n (1 << n) of mask is set
+	enum { channel_count = 6 };
+	void mute_voices( int mask );
+
+	// Write addr to register 0 then data to register 1
+	void write0( int addr, int data );
+
+	// Write addr to register 2 then data to register 3
+	void write1( int addr, int data );
+
+	// Run and add pair_count samples into current output buffer contents
+	typedef short sample_t;
+	enum { out_chan_count = 2 }; // stereo
+	void run( int pair_count, sample_t* out );
+};
+
+#endif
diff --git a/libraries/game-music-emu/gme/Ym2612_MAME.cpp b/libraries/game-music-emu/gme/Ym2612_MAME.cpp
new file mode 100644
index 000000000..524dab55a
--- /dev/null
+++ b/libraries/game-music-emu/gme/Ym2612_MAME.cpp
@@ -0,0 +1,3108 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+// Based on Mame YM2612 ym2612.c
+
+#include "Ym2612_MAME.h"
+
+/*
+**
+** File: fm2612.c -- software implementation of Yamaha YM2612 FM sound generator
+** Split from fm.c to keep 2612 fixes from infecting other OPN chips
+**
+** Copyright Jarek Burczynski (bujar at mame dot net)
+** Copyright Tatsuyuki Satoh , MultiArcadeMachineEmulator development
+**
+** Version 1.5.1 (Genesis Plus GX ym2612.c rev. 368)
+**
+*/
+
+/*
+** History:
+**
+** 2006~2012  Eke-Eke (Genesis Plus GX):
+** Huge thanks to Nemesis, lot of those fixes came from his tests on Sega Genesis hardware
+** More informations at http://gendev.spritesmind.net/forum/viewtopic.php?t=386
+**
+**  TODO:
+**
+**  - core documentation
+**  - BUSY flag support
+**
+**  CHANGELOG:
+**
+** 26-09-2017 Eke-Eke (Genesis Plus GX):
+**  - fixed EG counter loopback behavior (verified on YM3438 die)
+**  - reverted changes to EG rates 2-7 increment values
+**
+** xx-xx-xxxx
+**  - fixed LFO implementation:
+**      .added support for CH3 special mode: fixes various sound effects (birds in Warlock, bug sound in Aladdin...)
+**      .inverted LFO AM waveform: fixes Spider-Man & Venom : Separation Anxiety (intro), California Games (surfing event)
+**      .improved LFO timing accuracy: now updated AFTER sample output, like EG/PG updates, and without any precision loss anymore.
+**  - improved internal timers emulation
+**  - adjusted lowest EG rates increment values
+**  - fixed Attack Rate not being updated in some specific cases (Batman & Robin intro)
+**  - fixed EG behavior when Attack Rate is maximal
+**  - fixed EG behavior when SL=0 (Mega Turrican tracks 03,09...) or/and Key ON occurs at minimal attenuation
+**  - implemented EG output immediate changes on register writes
+**  - fixed YM2612 initial values (after the reset): fixes missing intro in B.O.B
+**  - implemented Detune overflow (Ariel, Comix Zone, Shaq Fu, Spiderman & many other games using GEMS sound engine)
+**  - implemented accurate CSM mode emulation
+**  - implemented accurate SSG-EG emulation (Asterix, Beavis&Butthead, Bubba'n Stix & many other games)
+**  - implemented accurate address/data ports behavior
+**
+** 06-23-2007 Zsolt Vasvari:
+**  - changed the timing not to require the use of floating point calculations
+**
+** 03-08-2003 Jarek Burczynski:
+**  - fixed YM2608 initial values (after the reset)
+**  - fixed flag and irqmask handling (YM2608)
+**  - fixed BUFRDY flag handling (YM2608)
+**
+** 14-06-2003 Jarek Burczynski:
+**  - implemented all of the YM2608 status register flags
+**  - implemented support for external memory read/write via YM2608
+**  - implemented support for deltat memory limit register in YM2608 emulation
+**
+** 22-05-2003 Jarek Burczynski:
+**  - fixed LFO PM calculations (copy&paste bugfix)
+**
+** 08-05-2003 Jarek Burczynski:
+**  - fixed SSG support
+**
+** 22-04-2003 Jarek Burczynski:
+**  - implemented 100% correct LFO generator (verified on real YM2610 and YM2608)
+**
+** 15-04-2003 Jarek Burczynski:
+**  - added support for YM2608's register 0x110 - status mask
+**
+** 01-12-2002 Jarek Burczynski:
+**  - fixed register addressing in YM2608, YM2610, YM2610B chips. (verified on real YM2608)
+**    The addressing patch used for early Neo-Geo games can be removed now.
+**
+** 26-11-2002 Jarek Burczynski, Nicola Salmoria:
+**  - recreated YM2608 ADPCM ROM using data from real YM2608's output which leads to:
+**  - added emulation of YM2608 drums.
+**  - output of YM2608 is two times lower now - same as YM2610 (verified on real YM2608)
+**
+** 16-08-2002 Jarek Burczynski:
+**  - binary exact Envelope Generator (verified on real YM2203);
+**    identical to YM2151
+**  - corrected 'off by one' error in feedback calculations (when feedback is off)
+**  - corrected connection (algorithm) calculation (verified on real YM2203 and YM2610)
+**
+** 18-12-2001 Jarek Burczynski:
+**  - added SSG-EG support (verified on real YM2203)
+**
+** 12-08-2001 Jarek Burczynski:
+**  - corrected sin_tab and tl_tab data (verified on real chip)
+**  - corrected feedback calculations (verified on real chip)
+**  - corrected phase generator calculations (verified on real chip)
+**  - corrected envelope generator calculations (verified on real chip)
+**  - corrected FM volume level (YM2610 and YM2610B).
+**  - changed YMxxxUpdateOne() functions (YM2203, YM2608, YM2610, YM2610B, YM2612) :
+**    this was needed to calculate YM2610 FM channels output correctly.
+**    (Each FM channel is calculated as in other chips, but the output of the channel
+**    gets shifted right by one *before* sending to accumulator. That was impossible to do
+**    with previous implementation).
+**
+** 23-07-2001 Jarek Burczynski, Nicola Salmoria:
+**  - corrected YM2610 ADPCM type A algorithm and tables (verified on real chip)
+**
+** 11-06-2001 Jarek Burczynski:
+**  - corrected end of sample bug in ADPCMA_calc_cha().
+**    Real YM2610 checks for equality between current and end addresses (only 20 LSB bits).
+**
+** 08-12-98 hiro-shi:
+** rename ADPCMA -> ADPCMB, ADPCMB -> ADPCMA
+** move ROM limit check.(CALC_CH? -> 2610Write1/2)
+** test program (ADPCMB_TEST)
+** move ADPCM A/B end check.
+** ADPCMB repeat flag(no check)
+** change ADPCM volume rate (8->16) (32->48).
+**
+** 09-12-98 hiro-shi:
+** change ADPCM volume. (8->16, 48->64)
+** replace ym2610 ch0/3 (YM-2610B)
+** change ADPCM_SHIFT (10->8) missing bank change 0x4000-0xffff.
+** add ADPCM_SHIFT_MASK
+** change ADPCMA_DECODE_MIN/MAX.
+*/
+
+/************************************************************************/
+/*    comment of hiro-shi(Hiromitsu Shioya)                             */
+/*    YM2610(B) = OPN-B                                                 */
+/*    YM2610  : PSG:3ch FM:4ch ADPCM(18.5KHz):6ch DeltaT ADPCM:1ch      */
+/*    YM2610B : PSG:3ch FM:6ch ADPCM(18.5KHz):6ch DeltaT ADPCM:1ch      */
+/************************************************************************/
+
+#include <stdlib.h>
+#include <string.h>	/* for memset */
+#include <stddef.h>	/* for NULL */
+#include <math.h>
+#include <stdint.h>
+
+namespace Ym2612_MameImpl
+{
+
+/* ---- mamedef - begin ---- */
+/* typedefs to use MAME's (U)INTxx types (copied from MAME\src\ods\odscomm.h) */
+/* 8-bit values */
+typedef unsigned char						UINT8;
+typedef signed char 						INT8;
+
+/* 16-bit values */
+typedef unsigned short						UINT16;
+typedef signed short						INT16;
+
+/* 32-bit values */
+#ifndef _WINDOWS_H
+typedef unsigned int						UINT32;
+typedef signed int							INT32;
+#endif
+
+/* 64-bit values */
+#ifndef _WINDOWS_H
+#ifdef _MSC_VER
+typedef signed __int64						INT64;
+typedef unsigned __int64					UINT64;
+#else
+__extension__ typedef unsigned long long	UINT64;
+__extension__ typedef signed long long		INT64;
+#endif
+#endif
+
+/* offsets and addresses are 32-bit (for now...) */
+typedef UINT32	offs_t;
+
+/* stream_sample_t is used to represent a single sample in a sound stream */
+typedef INT16 stream_sample_t;
+
+#if defined(VGM_BIG_ENDIAN)
+#define BYTE_XOR_BE(x)	 (x)
+#elif defined(VGM_LITTLE_ENDIAN)
+#define BYTE_XOR_BE(x)	((x) ^ 0x01)
+#else
+/* don't define BYTE_XOR_BE so that it throws an error when compiling */
+#endif
+
+#if defined(_MSC_VER)
+//#define INLINE	static __forceinline
+#define INLINE	static __inline
+#elif defined(__GNUC__)
+#define INLINE	static __inline__
+#else
+#define INLINE	static inline
+#endif
+
+#ifndef M_PI
+#define M_PI	3.14159265358979323846
+#endif
+
+#ifdef _DEBUG
+#define logerror	printf
+#else
+#define logerror
+#endif
+
+typedef void (*SRATE_CALLBACK)(void*, UINT32);
+/* ---- mamedef - end ---- */
+
+/* --- select emulation chips --- */
+/*
+#define BUILD_YM2203  (HAS_YM2203)		// build YM2203(OPN)   emulator
+#define BUILD_YM2608  (HAS_YM2608)		// build YM2608(OPNA)  emulator
+#define BUILD_YM2610  (HAS_YM2610)		// build YM2610(OPNB)  emulator
+#define BUILD_YM2610B (HAS_YM2610B)		// build YM2610B(OPNB?)emulator
+#define BUILD_YM2612  (HAS_YM2612)		// build YM2612(OPN2)  emulator
+#define BUILD_YM3438  (HAS_YM3438)		// build YM3438(OPN) emulator
+*/
+#define BUILD_YM2203  0
+#define BUILD_YM2608  0
+#define BUILD_YM2610  0
+#define BUILD_YM2610B 0
+#define BUILD_YM2612  1
+#define BUILD_YM3438  0
+
+#define FM_BUSY_FLAG_SUPPORT 0
+
+/* select bit size of output : 8 or 16 */
+#define FM_SAMPLE_BITS 16
+
+/* select timer system internal or external */
+#define FM_INTERNAL_TIMER 1
+
+/* --- speedup optimize --- */
+/* busy flag enulation , The definition of FM_GET_TIME_NOW() is necessary. */
+/* #define FM_BUSY_FLAG_SUPPORT 1 */
+
+/* --- external SSG(YM2149/AY-3-8910)emulator interface port */
+/* used by YM2203,YM2608,and YM2610 */
+typedef struct _ssg_callbacks ssg_callbacks;
+struct _ssg_callbacks
+{
+	void (*set_clock)(void *param, int clock);
+	void (*write)(void *param, int address, int data);
+	int (*read)(void *param);
+	void (*reset)(void *param);
+};
+
+/* --- external callback funstions for realtime update --- */
+
+#if FM_BUSY_FLAG_SUPPORT
+#define TIME_TYPE					attotime
+#define UNDEFINED_TIME				attotime_zero
+#define FM_GET_TIME_NOW(machine)			timer_get_time(machine)
+#define ADD_TIMES(t1, t2)   		attotime_add((t1), (t2))
+#define COMPARE_TIMES(t1, t2)		attotime_compare((t1), (t2))
+#define MULTIPLY_TIME_BY_INT(t,i)	attotime_mul(t, i)
+#endif
+
+/* compiler dependence */
+#if 0
+#ifndef OSD_CPU_H
+#define OSD_CPU_H
+typedef unsigned char	UINT8;   /* unsigned  8bit */
+typedef unsigned short	UINT16;  /* unsigned 16bit */
+typedef unsigned int	UINT32;  /* unsigned 32bit */
+typedef signed char		INT8;    /* signed  8bit   */
+typedef signed short	INT16;   /* signed 16bit   */
+typedef signed int		INT32;   /* signed 32bit   */
+#endif /* OSD_CPU_H */
+#endif
+
+typedef stream_sample_t FMSAMPLE;
+/*
+#if (FM_SAMPLE_BITS==16)
+typedef INT16 FMSAMPLE;
+#endif
+#if (FM_SAMPLE_BITS==8)
+typedef unsigned char  FMSAMPLE;
+#endif
+*/
+
+typedef void (*FM_TIMERHANDLER)(void *param,int c,int cnt,int clock);
+typedef void (*FM_IRQHANDLER)(void *param,int irq);
+/* FM_TIMERHANDLER : Stop or Start timer         */
+/* int n          = chip number                  */
+/* int c          = Channel 0=TimerA,1=TimerB    */
+/* int count      = timer count (0=stop)         */
+/* doube stepTime = step time of one count (sec.)*/
+
+/* FM_IRQHHANDLER : IRQ level changing sense     */
+/* int n       = chip number                     */
+/* int irq     = IRQ level 0=OFF,1=ON            */
+
+/**
+ * @brief Initialize chip and return the instance
+ * @param param Unused, keep NULL
+ * @param baseclock YM2612 clock
+ * @param rate Output sample rate
+ * @param TimerHandler Keep NULL
+ * @param IRQHandler Keep NULL
+ * @return Chip instance or NULL on any error
+ */
+static void * ym2612_init(void *param, int baseclock, int rate,
+               FM_TIMERHANDLER TimerHandler,FM_IRQHANDLER IRQHandler);
+/**
+ * @brief Free chip instance
+ * @param chip Chip instance
+ */
+static void ym2612_shutdown(void *chip);
+/**
+ * @brief Reset state of the chip
+ * @param chip Chip instance
+ */
+static void ym2612_reset_chip(void *chip);
+/**
+ * @brief Generate stereo output of specified length
+ * @param chip Chip instance
+ * @param buffer Output sound buffer
+ * @param frames Output buffer size in frames (one frame - two array entries of the buffer)
+ * @param mix 0 - override buffer data, 1 - mix output data with a content of the buffer
+ */
+static void ym2612_generate(void *chip, FMSAMPLE *buffer, int frames, int mix);
+#define ym2612_update_one(chip, buffer, length) ym2612_generate(chip, buffer, length, 0)
+
+/**
+ * @brief Single-Sample generation prepare
+ * @param chip Chip instance
+ */
+static void ym2612_pre_generate(void *chip);
+/**
+ * @brief Generate single stereo PCM frame. Will be used native sample rate of 53267 Hz
+ * @param chip Chip instance
+ * @param buffer One stereo PCM frame
+ */
+static void ym2612_generate_one_native(void *chip, FMSAMPLE buffer[2]);
+
+/* void ym2612_post_generate(void *chip, int length); */
+
+static int ym2612_write(void *chip, int a,unsigned char v);
+#if 0
+static unsigned char ym2612_read(void *chip,int a);
+static int ym2612_timer_over(void *chip, int c );
+#endif
+
+#ifdef __STATE_H__
+static void ym2612_postload(void *chip);
+#endif
+
+static void ym2612_set_mutemask(void *chip, UINT32 MuteMask);
+#if 0
+static void ym2612_setoptions(UINT8 Flags);
+#endif
+
+
+static stream_sample_t *DUMMYBUF = NULL;
+
+/* shared function building option */
+#define BUILD_OPN (BUILD_YM2203||BUILD_YM2608||BUILD_YM2610||BUILD_YM2610B||BUILD_YM2612||BUILD_YM3438)
+#define BUILD_OPN_PRESCALER (BUILD_YM2203||BUILD_YM2608)
+
+#define RSM_ENABLE 0
+#define RSM_FRAC 10
+
+/* globals */
+#define TYPE_SSG    0x01    /* SSG support          */
+#define TYPE_LFOPAN 0x02    /* OPN type LFO and PAN */
+#define TYPE_6CH    0x04    /* FM 6CH / 3CH         */
+#define TYPE_DAC    0x08    /* YM2612's DAC device  */
+#define TYPE_ADPCM  0x10    /* two ADPCM units      */
+#define TYPE_2610   0x20    /* bogus flag to differentiate 2608 from 2610 */
+
+
+#define TYPE_YM2203 (TYPE_SSG)
+#define TYPE_YM2608 (TYPE_SSG |TYPE_LFOPAN |TYPE_6CH |TYPE_ADPCM)
+#define TYPE_YM2610 (TYPE_SSG |TYPE_LFOPAN |TYPE_6CH |TYPE_ADPCM |TYPE_2610)
+#define TYPE_YM2612 (TYPE_DAC |TYPE_LFOPAN |TYPE_6CH)
+
+
+/* globals */
+#define FREQ_SH			16  /* 16.16 fixed point (frequency calculations) */
+#define EG_SH			16  /* 16.16 fixed point (envelope generator timing) */
+#define LFO_SH			24  /*  8.24 fixed point (LFO calculations)       */
+#define TIMER_SH		16  /* 16.16 fixed point (timers calculations)    */
+
+#define FREQ_MASK		((1<<FREQ_SH)-1)
+
+#define MAXOUT    (+32767)
+#define MINOUT    (-32768)
+
+/* envelope generator */
+#define ENV_BITS		10
+#define ENV_LEN			(1<<ENV_BITS)
+#define ENV_STEP		(128.0/ENV_LEN)
+
+#define MAX_ATT_INDEX	(ENV_LEN-1) /* 1023 */
+#define MIN_ATT_INDEX	(0)			/* 0 */
+
+#define EG_ATT			4
+#define EG_DEC			3
+#define EG_SUS			2
+#define EG_REL			1
+#define EG_OFF			0
+
+/* operator unit */
+#define SIN_BITS		10
+#define SIN_LEN			(1<<SIN_BITS)
+#define SIN_MASK		(SIN_LEN-1)
+
+#define TL_RES_LEN		(256) /* 8 bits addressing (real chip) */
+
+/*  TL_TAB_LEN is calculated as:
+*   13 - sinus amplitude bits     (Y axis)
+*   2  - sinus sign bit           (Y axis)
+*   TL_RES_LEN - sinus resolution (X axis)
+*/
+#define TL_TAB_LEN (13*2*TL_RES_LEN)
+static signed int tl_tab[TL_TAB_LEN];
+
+#define ENV_QUIET		(TL_TAB_LEN>>3)
+
+/* sin waveform table in 'decibel' scale */
+static unsigned int sin_tab[SIN_LEN];
+
+/* sustain level table (3dB per step) */
+/* bit0, bit1, bit2, bit3, bit4, bit5, bit6 */
+/* 1,    2,    4,    8,    16,   32,   64   (value)*/
+/* 0.75, 1.5,  3,    6,    12,   24,   48   (dB)*/
+
+/* 0 - 15: 0, 3, 6, 9,12,15,18,21,24,27,30,33,36,39,42,93 (dB)*/
+/* attenuation value (10 bits) = (SL << 2) << 3 */
+#define SC(db) (UINT32) ( db * (4.0/ENV_STEP) )
+static const UINT32 sl_table[16]={
+ SC( 0),SC( 1),SC( 2),SC(3 ),SC(4 ),SC(5 ),SC(6 ),SC( 7),
+ SC( 8),SC( 9),SC(10),SC(11),SC(12),SC(13),SC(14),SC(31)
+};
+#undef SC
+
+
+#define RATE_STEPS (8)
+static const UINT8 eg_inc[19*RATE_STEPS]={
+
+/*cycle:0 1  2 3  4 5  6 7*/
+
+/* 0 */ 0,1, 0,1, 0,1, 0,1, /* rates 00..11 0 (increment by 0 or 1) */
+/* 1 */ 0,1, 0,1, 1,1, 0,1, /* rates 00..11 1 */
+/* 2 */ 0,1, 1,1, 0,1, 1,1, /* rates 00..11 2 */
+/* 3 */ 0,1, 1,1, 1,1, 1,1, /* rates 00..11 3 */
+
+/* 4 */ 1,1, 1,1, 1,1, 1,1, /* rate 12 0 (increment by 1) */
+/* 5 */ 1,1, 1,2, 1,1, 1,2, /* rate 12 1 */
+/* 6 */ 1,2, 1,2, 1,2, 1,2, /* rate 12 2 */
+/* 7 */ 1,2, 2,2, 1,2, 2,2, /* rate 12 3 */
+
+/* 8 */ 2,2, 2,2, 2,2, 2,2, /* rate 13 0 (increment by 2) */
+/* 9 */ 2,2, 2,4, 2,2, 2,4, /* rate 13 1 */
+/*10 */ 2,4, 2,4, 2,4, 2,4, /* rate 13 2 */
+/*11 */ 2,4, 4,4, 2,4, 4,4, /* rate 13 3 */
+
+/*12 */ 4,4, 4,4, 4,4, 4,4, /* rate 14 0 (increment by 4) */
+/*13 */ 4,4, 4,8, 4,4, 4,8, /* rate 14 1 */
+/*14 */ 4,8, 4,8, 4,8, 4,8, /* rate 14 2 */
+/*15 */ 4,8, 8,8, 4,8, 8,8, /* rate 14 3 */
+
+/*16 */ 8,8, 8,8, 8,8, 8,8, /* rates 15 0, 15 1, 15 2, 15 3 (increment by 8) */
+/*17 */ 16,16,16,16,16,16,16,16, /* rates 15 2, 15 3 for attack */
+/*18 */ 0,0, 0,0, 0,0, 0,0, /* infinity rates for attack and decay(s) */
+};
+
+
+#define O(a) (a*RATE_STEPS)
+
+/*note that there is no O(17) in this table - it's directly in the code */
+static const UINT8 eg_rate_select2612[32+64+32]={  /* Envelope Generator rates (32 + 64 rates + 32 RKS) */
+/* 32 infinite time rates (same as Rate 0) */
+O(18),O(18),O(18),O(18),O(18),O(18),O(18),O(18),
+O(18),O(18),O(18),O(18),O(18),O(18),O(18),O(18),
+O(18),O(18),O(18),O(18),O(18),O(18),O(18),O(18),
+O(18),O(18),O(18),O(18),O(18),O(18),O(18),O(18),
+
+/* rates 00-11 */
+/*
+O( 0),O( 1),O( 2),O( 3),
+O( 0),O( 1),O( 2),O( 3),
+*/
+O(18),O(18),O( 2),O( 3),    /* from Nemesis's tests on real YM2612 hardware */
+O( 0),O( 1),O( 2),O( 2),    /* Nemesis's tests */
+
+O( 0),O( 1),O( 2),O( 3),
+O( 0),O( 1),O( 2),O( 3),
+O( 0),O( 1),O( 2),O( 3),
+O( 0),O( 1),O( 2),O( 3),
+O( 0),O( 1),O( 2),O( 3),
+O( 0),O( 1),O( 2),O( 3),
+O( 0),O( 1),O( 2),O( 3),
+O( 0),O( 1),O( 2),O( 3),
+O( 0),O( 1),O( 2),O( 3),
+O( 0),O( 1),O( 2),O( 3),
+
+/* rate 12 */
+O( 4),O( 5),O( 6),O( 7),
+
+/* rate 13 */
+O( 8),O( 9),O(10),O(11),
+
+/* rate 14 */
+O(12),O(13),O(14),O(15),
+
+/* rate 15 */
+O(16),O(16),O(16),O(16),
+
+/* 32 dummy rates (same as 15 3) */
+O(16),O(16),O(16),O(16),O(16),O(16),O(16),O(16),
+O(16),O(16),O(16),O(16),O(16),O(16),O(16),O(16),
+O(16),O(16),O(16),O(16),O(16),O(16),O(16),O(16),
+O(16),O(16),O(16),O(16),O(16),O(16),O(16),O(16)
+
+};
+#undef O
+
+/*rate  0,    1,    2,   3,   4,   5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15*/
+/*shift 11,   10,   9,   8,   7,   6,  5,  4,  3,  2, 1,  0,  0,  0,  0,  0 */
+/*mask  2047, 1023, 511, 255, 127, 63, 31, 15, 7,  3, 1,  0,  0,  0,  0,  0 */
+
+#define O(a) (a*1)
+static const UINT8 eg_rate_shift[32+64+32]={  /* Envelope Generator counter shifts (32 + 64 rates + 32 RKS) */
+/* 32 infinite time rates */
+/* O(0),O(0),O(0),O(0),O(0),O(0),O(0),O(0),
+O(0),O(0),O(0),O(0),O(0),O(0),O(0),O(0),
+O(0),O(0),O(0),O(0),O(0),O(0),O(0),O(0),
+O(0),O(0),O(0),O(0),O(0),O(0),O(0),O(0), */
+
+/* fixed (should be the same as rate 0, even if it makes no difference since increment value is 0 for these rates) */
+O(11),O(11),O(11),O(11),O(11),O(11),O(11),O(11),
+O(11),O(11),O(11),O(11),O(11),O(11),O(11),O(11),
+O(11),O(11),O(11),O(11),O(11),O(11),O(11),O(11),
+O(11),O(11),O(11),O(11),O(11),O(11),O(11),O(11),
+
+/* rates 00-11 */
+O(11),O(11),O(11),O(11),
+O(10),O(10),O(10),O(10),
+O( 9),O( 9),O( 9),O( 9),
+O( 8),O( 8),O( 8),O( 8),
+O( 7),O( 7),O( 7),O( 7),
+O( 6),O( 6),O( 6),O( 6),
+O( 5),O( 5),O( 5),O( 5),
+O( 4),O( 4),O( 4),O( 4),
+O( 3),O( 3),O( 3),O( 3),
+O( 2),O( 2),O( 2),O( 2),
+O( 1),O( 1),O( 1),O( 1),
+O( 0),O( 0),O( 0),O( 0),
+
+/* rate 12 */
+O( 0),O( 0),O( 0),O( 0),
+
+/* rate 13 */
+O( 0),O( 0),O( 0),O( 0),
+
+/* rate 14 */
+O( 0),O( 0),O( 0),O( 0),
+
+/* rate 15 */
+O( 0),O( 0),O( 0),O( 0),
+
+/* 32 dummy rates (same as 15 3) */
+O( 0),O( 0),O( 0),O( 0),O( 0),O( 0),O( 0),O( 0),
+O( 0),O( 0),O( 0),O( 0),O( 0),O( 0),O( 0),O( 0),
+O( 0),O( 0),O( 0),O( 0),O( 0),O( 0),O( 0),O( 0),
+O( 0),O( 0),O( 0),O( 0),O( 0),O( 0),O( 0),O( 0)
+
+};
+#undef O
+
+static const UINT8 dt_tab[4 * 32]={
+/* this is YM2151 and YM2612 phase increment data (in 10.10 fixed point format)*/
+/* FD=0 */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/* FD=1 */
+	0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2,
+	2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7, 8, 8, 8, 8,
+/* FD=2 */
+	1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5,
+	5, 6, 6, 7, 8, 8, 9,10,11,12,13,14,16,16,16,16,
+/* FD=3 */
+	2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7,
+	8 , 8, 9,10,11,12,13,14,16,17,19,20,22,22,22,22
+};
+
+
+/* OPN key frequency number -> key code follow table */
+/* fnum higher 4bit -> keycode lower 2bit */
+static const UINT8 opn_fktable[16] = {0,0,0,0,0,0,0,1,2,3,3,3,3,3,3,3};
+
+
+/* 8 LFO speed parameters */
+/* each value represents number of samples that one LFO level will last for */
+static const UINT32 lfo_samples_per_step[8] = {108, 77, 71, 67, 62, 44, 8, 5};
+
+
+
+/*There are 4 different LFO AM depths available, they are:
+  0 dB, 1.4 dB, 5.9 dB, 11.8 dB
+  Here is how it is generated (in EG steps):
+
+  11.8 dB = 0, 2, 4, 6, 8, 10,12,14,16...126,126,124,122,120,118,....4,2,0
+   5.9 dB = 0, 1, 2, 3, 4, 5, 6, 7, 8....63, 63, 62, 61, 60, 59,.....2,1,0
+   1.4 dB = 0, 0, 0, 0, 1, 1, 1, 1, 2,...15, 15, 15, 15, 14, 14,.....0,0,0
+
+  (1.4 dB is losing precision as you can see)
+
+  It's implemented as generator from 0..126 with step 2 then a shift
+  right N times, where N is:
+    8 for 0 dB
+    3 for 1.4 dB
+    1 for 5.9 dB
+    0 for 11.8 dB
+*/
+static const UINT8 lfo_ams_depth_shift[4] = {8, 3, 1, 0};
+
+
+
+/*There are 8 different LFO PM depths available, they are:
+  0, 3.4, 6.7, 10, 14, 20, 40, 80 (cents)
+
+  Modulation level at each depth depends on F-NUMBER bits: 4,5,6,7,8,9,10
+  (bits 8,9,10 = FNUM MSB from OCT/FNUM register)
+
+  Here we store only first quarter (positive one) of full waveform.
+  Full table (lfo_pm_table) containing all 128 waveforms is build
+  at run (init) time.
+
+  One value in table below represents 4 (four) basic LFO steps
+  (1 PM step = 4 AM steps).
+
+  For example:
+   at LFO SPEED=0 (which is 108 samples per basic LFO step)
+   one value from "lfo_pm_output" table lasts for 432 consecutive
+   samples (4*108=432) and one full LFO waveform cycle lasts for 13824
+   samples (32*432=13824; 32 because we store only a quarter of whole
+            waveform in the table below)
+*/
+static const UINT8 lfo_pm_output[7*8][8]={ /* 7 bits meaningful (of F-NUMBER), 8 LFO output levels per one depth (out of 32), 8 LFO depths */
+/* FNUM BIT 4: 000 0001xxxx */
+/* DEPTH 0 */ {0,   0,   0,   0,   0,   0,   0,   0},
+/* DEPTH 1 */ {0,   0,   0,   0,   0,   0,   0,   0},
+/* DEPTH 2 */ {0,   0,   0,   0,   0,   0,   0,   0},
+/* DEPTH 3 */ {0,   0,   0,   0,   0,   0,   0,   0},
+/* DEPTH 4 */ {0,   0,   0,   0,   0,   0,   0,   0},
+/* DEPTH 5 */ {0,   0,   0,   0,   0,   0,   0,   0},
+/* DEPTH 6 */ {0,   0,   0,   0,   0,   0,   0,   0},
+/* DEPTH 7 */ {0,   0,   0,   0,   1,   1,   1,   1},
+
+/* FNUM BIT 5: 000 0010xxxx */
+/* DEPTH 0 */ {0,   0,   0,   0,   0,   0,   0,   0},
+/* DEPTH 1 */ {0,   0,   0,   0,   0,   0,   0,   0},
+/* DEPTH 2 */ {0,   0,   0,   0,   0,   0,   0,   0},
+/* DEPTH 3 */ {0,   0,   0,   0,   0,   0,   0,   0},
+/* DEPTH 4 */ {0,   0,   0,   0,   0,   0,   0,   0},
+/* DEPTH 5 */ {0,   0,   0,   0,   0,   0,   0,   0},
+/* DEPTH 6 */ {0,   0,   0,   0,   1,   1,   1,   1},
+/* DEPTH 7 */ {0,   0,   1,   1,   2,   2,   2,   3},
+
+/* FNUM BIT 6: 000 0100xxxx */
+/* DEPTH 0 */ {0,   0,   0,   0,   0,   0,   0,   0},
+/* DEPTH 1 */ {0,   0,   0,   0,   0,   0,   0,   0},
+/* DEPTH 2 */ {0,   0,   0,   0,   0,   0,   0,   0},
+/* DEPTH 3 */ {0,   0,   0,   0,   0,   0,   0,   0},
+/* DEPTH 4 */ {0,   0,   0,   0,   0,   0,   0,   1},
+/* DEPTH 5 */ {0,   0,   0,   0,   1,   1,   1,   1},
+/* DEPTH 6 */ {0,   0,   1,   1,   2,   2,   2,   3},
+/* DEPTH 7 */ {0,   0,   2,   3,   4,   4,   5,   6},
+
+/* FNUM BIT 7: 000 1000xxxx */
+/* DEPTH 0 */ {0,   0,   0,   0,   0,   0,   0,   0},
+/* DEPTH 1 */ {0,   0,   0,   0,   0,   0,   0,   0},
+/* DEPTH 2 */ {0,   0,   0,   0,   0,   0,   1,   1},
+/* DEPTH 3 */ {0,   0,   0,   0,   1,   1,   1,   1},
+/* DEPTH 4 */ {0,   0,   0,   1,   1,   1,   1,   2},
+/* DEPTH 5 */ {0,   0,   1,   1,   2,   2,   2,   3},
+/* DEPTH 6 */ {0,   0,   2,   3,   4,   4,   5,   6},
+/* DEPTH 7 */ {0,   0,   4,   6,   8,   8, 0xa, 0xc},
+
+/* FNUM BIT 8: 001 0000xxxx */
+/* DEPTH 0 */ {0,   0,   0,   0,   0,   0,   0,   0},
+/* DEPTH 1 */ {0,   0,   0,   0,   1,   1,   1,   1},
+/* DEPTH 2 */ {0,   0,   0,   1,   1,   1,   2,   2},
+/* DEPTH 3 */ {0,   0,   1,   1,   2,   2,   3,   3},
+/* DEPTH 4 */ {0,   0,   1,   2,   2,   2,   3,   4},
+/* DEPTH 5 */ {0,   0,   2,   3,   4,   4,   5,   6},
+/* DEPTH 6 */ {0,   0,   4,   6,   8,   8, 0xa, 0xc},
+/* DEPTH 7 */ {0,   0,   8, 0xc,0x10,0x10,0x14,0x18},
+
+/* FNUM BIT 9: 010 0000xxxx */
+/* DEPTH 0 */ {0,   0,   0,   0,   0,   0,   0,   0},
+/* DEPTH 1 */ {0,   0,   0,   0,   2,   2,   2,   2},
+/* DEPTH 2 */ {0,   0,   0,   2,   2,   2,   4,   4},
+/* DEPTH 3 */ {0,   0,   2,   2,   4,   4,   6,   6},
+/* DEPTH 4 */ {0,   0,   2,   4,   4,   4,   6,   8},
+/* DEPTH 5 */ {0,   0,   4,   6,   8,   8, 0xa, 0xc},
+/* DEPTH 6 */ {0,   0,   8, 0xc,0x10,0x10,0x14,0x18},
+/* DEPTH 7 */ {0,   0,0x10,0x18,0x20,0x20,0x28,0x30},
+
+/* FNUM BIT10: 100 0000xxxx */
+/* DEPTH 0 */ {0,   0,   0,   0,   0,   0,   0,   0},
+/* DEPTH 1 */ {0,   0,   0,   0,   4,   4,   4,   4},
+/* DEPTH 2 */ {0,   0,   0,   4,   4,   4,   8,   8},
+/* DEPTH 3 */ {0,   0,   4,   4,   8,   8, 0xc, 0xc},
+/* DEPTH 4 */ {0,   0,   4,   8,   8,   8, 0xc,0x10},
+/* DEPTH 5 */ {0,   0,   8, 0xc,0x10,0x10,0x14,0x18},
+/* DEPTH 6 */ {0,   0,0x10,0x18,0x20,0x20,0x28,0x30},
+/* DEPTH 7 */ {0,   0,0x20,0x30,0x40,0x40,0x50,0x60},
+
+};
+
+/* all 128 LFO PM waveforms */
+static INT32 lfo_pm_table[128*8*32]; /* 128 combinations of 7 bits meaningful (of F-NUMBER), 8 LFO depths, 32 LFO output levels per one depth */
+
+/* register number to channel number , slot offset */
+#define OPN_CHAN(N) (N&3)
+#define OPN_SLOT(N) ((N>>2)&3)
+
+/* slot number */
+#define SLOT1 0
+#define SLOT2 2
+#define SLOT3 1
+#define SLOT4 3
+
+/* bit0 = Right enable , bit1 = Left enable */
+#define OUTD_RIGHT  1
+#define OUTD_LEFT   2
+#define OUTD_CENTER 3
+
+
+/* save output as raw 16-bit sample */
+/* #define SAVE_SAMPLE */
+
+#ifdef SAVE_SAMPLE
+static FILE *sample[1];
+	#if 1	/*save to MONO file */
+		#define SAVE_ALL_CHANNELS \
+		{	signed int pom = lt; \
+			fputc((unsigned short)pom&0xff,sample[0]); \
+			fputc(((unsigned short)pom>>8)&0xff,sample[0]); \
+		}
+	#else	/*save to STEREO file */
+		#define SAVE_ALL_CHANNELS \
+		{	signed int pom = lt; \
+			fputc((unsigned short)pom&0xff,sample[0]); \
+			fputc(((unsigned short)pom>>8)&0xff,sample[0]); \
+			pom = rt; \
+			fputc((unsigned short)pom&0xff,sample[0]); \
+			fputc(((unsigned short)pom>>8)&0xff,sample[0]); \
+		}
+	#endif
+#endif
+
+
+/* struct describing a single operator (SLOT) */
+typedef struct
+{
+	INT32	*DT;		/* detune          :dt_tab[DT] */
+	UINT8	KSR;		/* key scale rate  :3-KSR */
+	UINT32	ar;			/* attack rate  */
+	UINT32	d1r;		/* decay rate   */
+	UINT32	d2r;		/* sustain rate */
+	UINT32	rr;			/* release rate */
+	UINT8	ksr;		/* key scale rate  :kcode>>(3-KSR) */
+	UINT32	mul;		/* multiple        :ML_TABLE[ML] */
+
+	/* Phase Generator */
+	UINT32	phase;		/* phase counter */
+	INT32	Incr;		/* phase step */
+
+	/* Envelope Generator */
+	UINT8	state;		/* phase type */
+	UINT32	tl;			/* total level: TL << 3 */
+	INT32	volume;		/* envelope counter */
+	UINT32	sl;			/* sustain level:sl_table[SL] */
+	UINT32	vol_out;	/* current output from EG circuit (without AM from LFO) */
+
+	UINT8	eg_sh_ar;	/*  (attack state) */
+	UINT8	eg_sel_ar;	/*  (attack state) */
+	UINT8	eg_sh_d1r;	/*  (decay state) */
+	UINT8	eg_sel_d1r;	/*  (decay state) */
+	UINT8	eg_sh_d2r;	/*  (sustain state) */
+	UINT8	eg_sel_d2r;	/*  (sustain state) */
+	UINT8	eg_sh_rr;	/*  (release state) */
+	UINT8	eg_sel_rr;	/*  (release state) */
+
+	UINT8	ssg;		/* SSG-EG waveform */
+	UINT8	ssgn;		/* SSG-EG negated output */
+
+	UINT8	key;		/* 0=last key was KEY OFF, 1=KEY ON */
+
+	/* LFO */
+	UINT32	AMmask;		/* AM enable flag */
+
+} FM_SLOT;
+
+typedef struct
+{
+	FM_SLOT	SLOT[4];	/* four SLOTs (operators) */
+
+	UINT8	ALGO;		/* algorithm */
+	UINT8	FB;			/* feedback shift */
+	INT32	op1_out[2];	/* op1 output for feedback */
+
+	INT32	*connect1;	/* SLOT1 output pointer */
+	INT32	*connect3;	/* SLOT3 output pointer */
+	INT32	*connect2;	/* SLOT2 output pointer */
+	INT32	*connect4;	/* SLOT4 output pointer */
+
+	INT32	*mem_connect;/* where to put the delayed sample (MEM) */
+	INT32	mem_value;	/* delayed sample (MEM) value */
+
+	INT32	pms;		/* channel PMS */
+	UINT8	ams;		/* channel AMS */
+
+	UINT32	fc;			/* fnum,blk:adjusted to sample rate */
+	UINT8	kcode;		/* key code:                        */
+	UINT32	block_fnum;	/* current blk/fnum value for this slot (can be different betweeen slots of one channel in 3slot mode) */
+	UINT8	Muted;
+} FM_CH;
+
+
+typedef struct
+{
+	/* running_device *device; */
+	void *		param;				/* this chip parameter  */
+	double		freqbase;			/* frequency base       */
+	int			timer_prescaler;	/* timer prescaler      */
+	UINT8		irq;				/* interrupt level      */
+	UINT8		irqmask;			/* irq mask             */
+#if FM_BUSY_FLAG_SUPPORT
+	TIME_TYPE	busy_expiry_time;	/* expiry time of the busy status */
+#endif
+	UINT32		clock;				/* master clock  (Hz)   */
+	UINT32		rate;				/* internal sampling rate (Hz) */
+#if RSM_ENABLE
+	INT32		rateratio;			/* resampling ratio */
+	INT32		framecnt;			/* resampling frames count*/
+	FMSAMPLE	cur_sample[2];		/* previous sample */
+	FMSAMPLE	prev_sample[2];		/* previous sample */
+#endif
+	UINT8		address;			/* address register     */
+	UINT8		status;				/* status flag          */
+	UINT32		mode;				/* mode  CSM / 3SLOT    */
+	UINT8		fn_h;				/* freq latch           */
+	UINT8		prescaler_sel;		/* prescaler selector   */
+	INT32		TA;					/* timer a              */
+	INT32		TAC;				/* timer a counter      */
+	UINT8		TB;					/* timer b              */
+	INT32		TBC;				/* timer b counter      */
+	/* local time tables */
+	INT32		dt_tab[8][32];		/* DeTune table         */
+	/* Extention Timer and IRQ handler */
+	FM_TIMERHANDLER	timer_handler;
+	FM_IRQHANDLER	IRQ_Handler;
+	const ssg_callbacks *SSG;
+} FM_ST;
+
+
+
+/***********************************************************/
+/* OPN unit                                                */
+/***********************************************************/
+
+/* OPN 3slot struct */
+typedef struct
+{
+	UINT32  fc[3];			/* fnum3,blk3: calculated */
+	UINT8	fn_h;			/* freq3 latch */
+	UINT8	kcode[3];		/* key code */
+	UINT32	block_fnum[3];	/* current fnum value for this slot (can be different betweeen slots of one channel in 3slot mode) */
+	UINT8   key_csm;        /* CSM mode Key-ON flag */
+} FM_3SLOT;
+
+/* OPN/A/B common state */
+typedef struct
+{
+	UINT8	type;			/* chip type */
+	FM_ST	ST;				/* general state */
+	FM_3SLOT SL3;			/* 3 slot mode state */
+	FM_CH	*P_CH;			/* pointer of CH */
+	unsigned int pan[6*2];	/* fm channels output masks (0xffffffff = enable) */
+
+	UINT32	eg_cnt;			/* global envelope generator counter */
+	UINT32	eg_timer;		/* global envelope generator counter works at frequency = chipclock/144/3 */
+	UINT32	eg_timer_add;	/* step of eg_timer */
+	UINT32	eg_timer_overflow;/* envelope generator timer overlfows every 3 samples (on real chip) */
+
+
+	/* there are 2048 FNUMs that can be generated using FNUM/BLK registers
+		but LFO works with one more bit of a precision so we really need 4096 elements */
+	UINT32  fn_table[4096]; /* fnumber->increment counter */
+	UINT32 fn_max;    /* maximal phase increment (used for phase overflow) */
+
+	/* LFO */
+	UINT8   lfo_cnt;            /* current LFO phase (out of 128) */
+	UINT32  lfo_timer;          /* current LFO phase runs at LFO frequency */
+	UINT32  lfo_timer_add;      /* step of lfo_timer */
+	UINT32  lfo_timer_overflow; /* LFO timer overflows every N samples (depends on LFO frequency) */
+	UINT32  LFO_AM;             /* current LFO AM step */
+	UINT32  LFO_PM;             /* current LFO PM step */
+
+	INT32	m2,c1,c2;		/* Phase Modulation input for operators 2,3,4 */
+	INT32	mem;			/* one sample delay memory */
+	INT32	out_fm[6];		/* outputs of working channels */
+
+} FM_OPN;
+
+/* here's the virtual YM2612 */
+typedef struct
+{
+	UINT8		REGS[512];			/* registers            */
+	FM_OPN		OPN;				/* OPN state            */
+	FM_CH		CH[6];				/* channel state        */
+	UINT8		addr_A1;			/* address line A1      */
+
+	/* dac output (YM2612) */
+	/* int			dacen; */
+	UINT8		dacen;
+	UINT8		dac_test;
+	INT32		dacout;
+	UINT8		MuteDAC;
+
+	UINT8		WaveOutMode;
+	INT32		WaveL;
+	INT32		WaveR;
+} YM2612;
+
+/* log output level */
+#define LOG_ERR  3      /* ERROR       */
+#define LOG_WAR  2      /* WARNING     */
+#define LOG_INF  1      /* INFORMATION */
+#define LOG_LEVEL LOG_INF
+
+#ifndef __RAINE__
+#define LOG(n,x) do { if( (n)>=LOG_LEVEL ) logerror x; } while (0)
+#endif
+
+/* limitter */
+#define Limit(val, max,min) { \
+	if ( val > max )      val = max; \
+	else if ( val < min ) val = min; \
+}
+
+#if 0
+#define USE_VGM_INIT_SWITCH
+static UINT8 IsVGMInit = 0;
+#endif
+static UINT8 PseudoSt = 0x00;
+/*#include <stdio.h>
+static FILE* hFile;
+static UINT32 FileSample;*/
+
+/* status set and IRQ handling */
+INLINE void FM_STATUS_SET(FM_ST *ST,int flag)
+{
+	/* set status flag */
+	ST->status |= flag;
+	if ( !(ST->irq) && (ST->status & ST->irqmask) )
+	{
+		ST->irq = 1;
+		/* callback user interrupt handler (IRQ is OFF to ON) */
+		if(ST->IRQ_Handler) (ST->IRQ_Handler)(ST->param,1);
+	}
+}
+
+/* status reset and IRQ handling */
+INLINE void FM_STATUS_RESET(FM_ST *ST,int flag)
+{
+	/* reset status flag */
+	ST->status &=~flag;
+	if ( (ST->irq) && !(ST->status & ST->irqmask) )
+	{
+		ST->irq = 0;
+		/* callback user interrupt handler (IRQ is ON to OFF) */
+		if(ST->IRQ_Handler) (ST->IRQ_Handler)(ST->param,0);
+	}
+}
+
+/* IRQ mask set */
+INLINE void FM_IRQMASK_SET(FM_ST *ST,int flag)
+{
+	ST->irqmask = flag;
+	/* IRQ handling check */
+	FM_STATUS_SET(ST,0);
+	FM_STATUS_RESET(ST,0);
+}
+
+INLINE void FM_KEYON(FM_OPN *OPN, FM_CH *CH , int s )
+{
+	FM_SLOT *SLOT = &CH->SLOT[s];
+
+	/* Note by Valley Bell:
+	   I assume that the CSM mode shouldn't affect channels
+	   other than FM3, so I added a check for it here.*/
+	if( !SLOT->key && (!OPN->SL3.key_csm || CH == &OPN->P_CH[3]))
+	{
+		/* restart Phase Generator */
+		SLOT->phase = 0;
+
+		/* reset SSG-EG inversion flag */
+		SLOT->ssgn = 0;
+
+		if ((SLOT->ar + SLOT->ksr) < 94 /*32+62*/)
+		{
+			SLOT->state = (SLOT->volume <= MIN_ATT_INDEX) ? ((SLOT->sl == MIN_ATT_INDEX) ? EG_SUS : EG_DEC) : EG_ATT;
+		}
+		else
+		{
+			/* force attenuation level to 0 */
+			SLOT->volume = MIN_ATT_INDEX;
+
+			/* directly switch to Decay (or Sustain) */
+			SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS : EG_DEC;
+		}
+
+		/* recalculate EG output */
+		if ((SLOT->ssg&0x08) && (SLOT->ssgn ^ (SLOT->ssg&0x04)))
+			SLOT->vol_out = ((UINT32)(0x200 - SLOT->volume) & MAX_ATT_INDEX) + SLOT->tl;
+		else
+			SLOT->vol_out = (UINT32)SLOT->volume + SLOT->tl;
+	}
+
+	SLOT->key = 1;
+}
+
+INLINE void FM_KEYOFF(FM_OPN *OPN, FM_CH *CH , int s )
+{
+	FM_SLOT *SLOT = &CH->SLOT[s];
+
+	if (SLOT->key && (!OPN->SL3.key_csm || CH == &OPN->P_CH[3]))
+	{
+#ifdef USE_VGM_INIT_SWITCH
+		if (IsVGMInit)	/* workaround for VGMs trimmed with VGMTool */
+		{
+			SLOT->state = EG_OFF;
+			SLOT->volume = MAX_ATT_INDEX;
+			SLOT->vol_out= MAX_ATT_INDEX;
+		}
+		else
+#endif
+		if (SLOT->state>EG_REL)
+		{
+			SLOT->state = EG_REL; /* phase -> Release */
+
+			/* SSG-EG specific update */
+			if (SLOT->ssg&0x08)
+			{
+				/* convert EG attenuation level */
+				if (SLOT->ssgn ^ (SLOT->ssg&0x04))
+						SLOT->volume = (0x200 - SLOT->volume);
+
+				/* force EG attenuation level */
+				if (SLOT->volume >= 0x200)
+				{
+					SLOT->volume = MAX_ATT_INDEX;
+					SLOT->state  = EG_OFF;
+				}
+
+				/* recalculate EG output */
+				SLOT->vol_out = (UINT32)SLOT->volume + SLOT->tl;
+			}
+		}
+	}
+
+	SLOT->key = 0;
+}
+
+INLINE void FM_KEYON_CSM(FM_OPN *OPN, FM_CH *CH , int s )
+{
+	FM_SLOT *SLOT = &CH->SLOT[s];
+
+	if( !SLOT->key && !OPN->SL3.key_csm)
+	{
+		/* restart Phase Generator */
+		SLOT->phase = 0;
+
+		/* reset SSG-EG inversion flag */
+		SLOT->ssgn = 0;
+
+		if ((SLOT->ar + SLOT->ksr) < 94 /*32+62*/)
+		{
+			SLOT->state = (SLOT->volume <= MIN_ATT_INDEX) ? ((SLOT->sl == MIN_ATT_INDEX) ? EG_SUS : EG_DEC) : EG_ATT;
+		}
+		else
+		{
+			/* force attenuation level to 0 */
+			SLOT->volume = MIN_ATT_INDEX;
+
+			/* directly switch to Decay (or Sustain) */
+			SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS : EG_DEC;
+		}
+
+		/* recalculate EG output */
+		if ((SLOT->ssg&0x08) && (SLOT->ssgn ^ (SLOT->ssg&0x04)))
+			SLOT->vol_out = ((UINT32)(0x200 - SLOT->volume) & MAX_ATT_INDEX) + SLOT->tl;
+		else
+			SLOT->vol_out = (UINT32)SLOT->volume + SLOT->tl;
+	}
+}
+
+INLINE void FM_KEYOFF_CSM(FM_CH *CH , int s )
+{
+	FM_SLOT *SLOT = &CH->SLOT[s];
+	if (!SLOT->key)
+	{
+#ifdef USE_VGM_INIT_SWITCH
+		if (IsVGMInit)
+		{
+			SLOT->state = EG_OFF;
+			SLOT->volume = MAX_ATT_INDEX;
+			SLOT->vol_out= MAX_ATT_INDEX;
+		}
+		else
+#endif
+		if (SLOT->state>EG_REL)
+		{
+			SLOT->state = EG_REL; /* phase -> Release */
+
+			/* SSG-EG specific update */
+			if (SLOT->ssg&0x08)
+			{
+				/* convert EG attenuation level */
+				if (SLOT->ssgn ^ (SLOT->ssg&0x04))
+					SLOT->volume = (0x200 - SLOT->volume);
+
+				/* force EG attenuation level */
+				if (SLOT->volume >= 0x200)
+				{
+					SLOT->volume = MAX_ATT_INDEX;
+					SLOT->state  = EG_OFF;
+				}
+
+				/* recalculate EG output */
+				SLOT->vol_out = (UINT32)SLOT->volume + SLOT->tl;
+			}
+		}
+	}
+}
+
+/* OPN Mode Register Write */
+INLINE void set_timers( FM_OPN *OPN, FM_ST *ST, void *n, int v )
+{
+	/* b7 = CSM MODE */
+	/* b6 = 3 slot mode */
+	/* b5 = reset b */
+	/* b4 = reset a */
+	/* b3 = timer enable b */
+	/* b2 = timer enable a */
+	/* b1 = load b */
+	/* b0 = load a */
+
+	if ((OPN->ST.mode ^ v) & 0xC0)
+	{
+		/* phase increment need to be recalculated */
+		OPN->P_CH[2].SLOT[SLOT1].Incr=-1;
+
+		/* CSM mode disabled and CSM key ON active*/
+		if (((v & 0xC0) != 0x80) && OPN->SL3.key_csm)
+		{
+			/* CSM Mode Key OFF (verified by Nemesis on real hardware) */
+			FM_KEYOFF_CSM(&OPN->P_CH[2],SLOT1);
+			FM_KEYOFF_CSM(&OPN->P_CH[2],SLOT2);
+			FM_KEYOFF_CSM(&OPN->P_CH[2],SLOT3);
+			FM_KEYOFF_CSM(&OPN->P_CH[2],SLOT4);
+			OPN->SL3.key_csm = 0;
+		}
+	}
+
+	/* reset Timer b flag */
+	if( v & 0x20 )
+		FM_STATUS_RESET(ST,0x02);
+	/* reset Timer a flag */
+	if( v & 0x10 )
+		FM_STATUS_RESET(ST,0x01);
+	/* load b */
+	if ((v&2) && !(ST->mode&2))
+	{
+		ST->TBC = ( 256-ST->TB)<<4;
+		/* External timer handler */
+		if (ST->timer_handler) (ST->timer_handler)(n,1,ST->TBC * ST->timer_prescaler,(int)ST->clock);
+	}
+	/* load a */
+	if ((v&1) && !(ST->mode&1))
+	{
+		ST->TAC = (1024-ST->TA);
+		/* External timer handler */
+		if (ST->timer_handler) (ST->timer_handler)(n,0,ST->TAC * ST->timer_prescaler,(int)ST->clock);
+		ST->TAC *= 4096;
+	}
+
+	ST->mode = (UINT32)v;
+}
+
+
+/* Timer A Overflow */
+INLINE void TimerAOver(FM_ST *ST)
+{
+	/* set status (if enabled) */
+	if(ST->mode & 0x04) FM_STATUS_SET(ST,0x01);
+	/* clear or reload the counter */
+	ST->TAC = (1024-ST->TA);
+	if (ST->timer_handler) (ST->timer_handler)(ST->param,0,ST->TAC * ST->timer_prescaler,(int)ST->clock);
+	ST->TAC *= 4096;
+}
+/* Timer B Overflow */
+INLINE void TimerBOver(FM_ST *ST)
+{
+	/* set status (if enabled) */
+	if(ST->mode & 0x08) FM_STATUS_SET(ST,0x02);
+	/* clear or reload the counter */
+	ST->TBC = ( 256-ST->TB)<<4;
+	if (ST->timer_handler) (ST->timer_handler)(ST->param,1,ST->TBC * ST->timer_prescaler,(int)ST->clock);
+}
+
+
+#if FM_INTERNAL_TIMER
+/* ----- internal timer mode , update timer */
+/* Valley Bell: defines fixed */
+
+/* ---------- calculate timer A ---------- */
+	#define INTERNAL_TIMER_A(ST,CSM_CH)					\
+	{													\
+		if( (ST)->TAC &&  ((ST)->timer_handler==0) )		\
+			if( ((ST)->TAC -= (int)((ST)->freqbase*4096)) <= 0 )	\
+			{											\
+				TimerAOver( ST );						\
+				/* CSM mode total level latch and auto key on */	\
+				if( (ST)->mode & 0x80 )					\
+					CSMKeyControll( OPN, CSM_CH );			\
+			}											\
+	}
+/* ---------- calculate timer B ---------- */
+	#define INTERNAL_TIMER_B(ST,step)						\
+	{														\
+		if( (ST)->TBC && ((ST)->timer_handler==0) )				\
+			if( ((ST)->TBC -= (int)((ST)->freqbase*4096*step)) <= 0 )	\
+				TimerBOver( ST );							\
+	}
+#else /* FM_INTERNAL_TIMER */
+/* external timer mode */
+#define INTERNAL_TIMER_A(ST,CSM_CH)
+#define INTERNAL_TIMER_B(ST,step)
+#endif /* FM_INTERNAL_TIMER */
+
+
+
+#if FM_BUSY_FLAG_SUPPORT
+#define FM_BUSY_CLEAR(ST) ((ST)->busy_expiry_time = UNDEFINED_TIME)
+INLINE UINT8 FM_STATUS_FLAG(FM_ST *ST)
+{
+	if( COMPARE_TIMES(ST->busy_expiry_time, UNDEFINED_TIME) != 0 )
+	{
+		if (COMPARE_TIMES(ST->busy_expiry_time, FM_GET_TIME_NOW(ST->device->machine)) > 0)
+			return ST->status | 0x80;	/* with busy */
+		/* expire */
+		FM_BUSY_CLEAR(ST);
+	}
+	return ST->status;
+}
+INLINE void FM_BUSY_SET(FM_ST *ST,int busyclock )
+{
+	TIME_TYPE expiry_period = MULTIPLY_TIME_BY_INT(ATTOTIME_IN_HZ(ST->clock), busyclock * ST->timer_prescaler);
+	ST->busy_expiry_time = ADD_TIMES(FM_GET_TIME_NOW(ST->device->machine), expiry_period);
+}
+#else
+#define FM_STATUS_FLAG(ST) ((ST)->status)
+#define FM_BUSY_SET(ST,bclock) {}
+#define FM_BUSY_CLEAR(ST) {}
+#endif
+
+
+/* set algorithm connection */
+INLINE void setup_connection( FM_OPN *OPN, FM_CH *CH, int ch )
+{
+	INT32 *carrier = &OPN->out_fm[ch];
+
+	INT32 **om1 = &CH->connect1;
+	INT32 **om2 = &CH->connect3;
+	INT32 **oc1 = &CH->connect2;
+
+	INT32 **memc = &CH->mem_connect;
+
+	switch( CH->ALGO )
+	{
+	case 0:
+		/* M1---C1---MEM---M2---C2---OUT */
+		*om1 = &OPN->c1;
+		*oc1 = &OPN->mem;
+		*om2 = &OPN->c2;
+		*memc= &OPN->m2;
+		break;
+	case 1:
+		/* M1------+-MEM---M2---C2---OUT */
+		/*      C1-+                     */
+		*om1 = &OPN->mem;
+		*oc1 = &OPN->mem;
+		*om2 = &OPN->c2;
+		*memc= &OPN->m2;
+		break;
+	case 2:
+		/* M1-----------------+-C2---OUT */
+		/*      C1---MEM---M2-+          */
+		*om1 = &OPN->c2;
+		*oc1 = &OPN->mem;
+		*om2 = &OPN->c2;
+		*memc= &OPN->m2;
+		break;
+	case 3:
+		/* M1---C1---MEM------+-C2---OUT */
+		/*                 M2-+          */
+		*om1 = &OPN->c1;
+		*oc1 = &OPN->mem;
+		*om2 = &OPN->c2;
+		*memc= &OPN->c2;
+		break;
+	case 4:
+		/* M1---C1-+-OUT */
+		/* M2---C2-+     */
+		/* MEM: not used */
+		*om1 = &OPN->c1;
+		*oc1 = carrier;
+		*om2 = &OPN->c2;
+		*memc= &OPN->mem;	/* store it anywhere where it will not be used */
+		break;
+	case 5:
+		/*    +----C1----+     */
+		/* M1-+-MEM---M2-+-OUT */
+		/*    +----C2----+     */
+		*om1 = 0;	/* special mark */
+		*oc1 = carrier;
+		*om2 = carrier;
+		*memc= &OPN->m2;
+		break;
+	case 6:
+		/* M1---C1-+     */
+		/*      M2-+-OUT */
+		/*      C2-+     */
+		/* MEM: not used */
+		*om1 = &OPN->c1;
+		*oc1 = carrier;
+		*om2 = carrier;
+		*memc= &OPN->mem;	/* store it anywhere where it will not be used */
+		break;
+	case 7:
+		/* M1-+     */
+		/* C1-+-OUT */
+		/* M2-+     */
+		/* C2-+     */
+		/* MEM: not used*/
+		*om1 = carrier;
+		*oc1 = carrier;
+		*om2 = carrier;
+		*memc= &OPN->mem;	/* store it anywhere where it will not be used */
+		break;
+	}
+
+	CH->connect4 = carrier;
+}
+
+/* set detune & multiple */
+INLINE void set_det_mul(FM_ST *ST,FM_CH *CH,FM_SLOT *SLOT,int v)
+{
+	SLOT->mul = (v&0x0f)? (v&0x0f)*2 : 1;
+	SLOT->DT  = ST->dt_tab[(v>>4)&7];
+	CH->SLOT[SLOT1].Incr=-1;
+}
+
+/* set total level */
+INLINE void set_tl(FM_CH *CH,FM_SLOT *SLOT , int v)
+{
+	SLOT->tl = (v&0x7f)<<(ENV_BITS-7); /* 7bit TL */
+	(void)CH;
+
+	/* recalculate EG output */
+	if ((SLOT->ssg&0x08) && (SLOT->ssgn ^ (SLOT->ssg&0x04)) && (SLOT->state > EG_REL))
+		SLOT->vol_out = ((UINT32)(0x200 - SLOT->volume) & MAX_ATT_INDEX) + SLOT->tl;
+	else
+		SLOT->vol_out = (UINT32)SLOT->volume + SLOT->tl;
+}
+
+/* set attack rate & key scale  */
+INLINE void set_ar_ksr(UINT8 type, FM_CH *CH,FM_SLOT *SLOT,int v)
+{
+	UINT8 old_KSR = SLOT->KSR;
+	(void)type;
+
+	SLOT->ar = (v&0x1f) ? 32 + ((v&0x1f)<<1) : 0;
+
+	SLOT->KSR = 3-(v>>6);
+	if (SLOT->KSR != old_KSR)
+	{
+		CH->SLOT[SLOT1].Incr=-1;
+	}
+
+	/* Even if it seems unnecessary, in some odd case, KSR and KC are both modified   */
+	/* and could result in SLOT->kc remaining unchanged.                              */
+	/* In such case, AR values would not be recalculated despite SLOT->ar has changed */
+	/* This fixes the introduction music of Batman & Robin    (Eke-Eke)               */
+	if ((SLOT->ar + SLOT->ksr) < 94 /*32+62*/)
+	{
+		SLOT->eg_sh_ar  = eg_rate_shift [SLOT->ar  + SLOT->ksr ];
+		SLOT->eg_sel_ar = eg_rate_select2612[SLOT->ar  + SLOT->ksr ];
+	}
+	else
+	{
+		SLOT->eg_sh_ar  = 0;
+		SLOT->eg_sel_ar = 18*RATE_STEPS;	/* verified by Nemesis on real hardware */
+	}
+}
+
+/* set decay rate */
+INLINE void set_dr(UINT8 type, FM_SLOT *SLOT,int v)
+{
+	(void)type;
+	SLOT->d1r = (v&0x1f) ? 32 + ((v&0x1f)<<1) : 0;
+
+	SLOT->eg_sh_d1r = eg_rate_shift [SLOT->d1r + SLOT->ksr];
+	SLOT->eg_sel_d1r= eg_rate_select2612[SLOT->d1r + SLOT->ksr];
+}
+
+/* set sustain rate */
+INLINE void set_sr(UINT8 type, FM_SLOT *SLOT,int v)
+{
+	(void)type;
+	SLOT->d2r = (v&0x1f) ? 32 + ((v&0x1f)<<1) : 0;
+
+	SLOT->eg_sh_d2r = eg_rate_shift [SLOT->d2r + SLOT->ksr];
+	SLOT->eg_sel_d2r= eg_rate_select2612[SLOT->d2r + SLOT->ksr];
+}
+
+/* set release rate */
+INLINE void set_sl_rr(UINT8 type, FM_SLOT *SLOT,int v)
+{
+	(void)type;
+	SLOT->sl = sl_table[ v>>4 ];
+
+	/* check EG state changes */
+	if ((SLOT->state == EG_DEC) && (SLOT->volume >= (INT32)(SLOT->sl)))
+		SLOT->state = EG_SUS;
+
+	SLOT->rr  = 34 + ((v&0x0f)<<2);
+
+	SLOT->eg_sh_rr  = eg_rate_shift [SLOT->rr  + SLOT->ksr];
+	SLOT->eg_sel_rr = eg_rate_select2612[SLOT->rr  + SLOT->ksr];
+}
+
+/* advance LFO to next sample */
+INLINE void advance_lfo(FM_OPN *OPN)
+{
+	if (OPN->lfo_timer_overflow)   /* LFO enabled ? */
+	{
+		/* increment LFO timer */
+		OPN->lfo_timer +=  OPN->lfo_timer_add;
+
+		/* when LFO is enabled, one level will last for 108, 77, 71, 67, 62, 44, 8 or 5 samples */
+		while (OPN->lfo_timer >= OPN->lfo_timer_overflow)
+		{
+			OPN->lfo_timer -= OPN->lfo_timer_overflow;
+
+			/* There are 128 LFO steps */
+			OPN->lfo_cnt = ( OPN->lfo_cnt + 1 ) & 127;
+
+			/* Valley Bell: Replaced old code (non-inverted triangle) with
+			   the one from Genesis Plus GX 1.71. */
+			/* triangle (inverted) */
+			/* AM: from 126 to 0 step -2, 0 to 126 step +2 */
+			if (OPN->lfo_cnt<64)
+				OPN->LFO_AM = (UINT32)(OPN->lfo_cnt ^ 63) << 1;
+			else
+				OPN->LFO_AM = (UINT32)(OPN->lfo_cnt & 63) << 1;
+
+			/* PM works with 4 times slower clock */
+			OPN->LFO_PM = OPN->lfo_cnt >> 2;
+		}
+	}
+}
+
+INLINE void advance_eg_channel(FM_OPN *OPN, FM_SLOT *SLOT)
+{
+	/* unsigned int out; */
+	unsigned int i = 4; /* four operators per channel */
+
+	do
+	{
+		switch(SLOT->state)
+		{
+			case EG_ATT:    /* attack phase */
+			if (!(OPN->eg_cnt & ((1<<SLOT->eg_sh_ar)-1)))
+			{
+				/* update attenuation level */
+				SLOT->volume += (~SLOT->volume * (eg_inc[SLOT->eg_sel_ar + ((OPN->eg_cnt>>SLOT->eg_sh_ar)&7)]))>>4;
+
+				/* check phase transition*/
+				if (SLOT->volume <= MIN_ATT_INDEX)
+				{
+					SLOT->volume = MIN_ATT_INDEX;
+					SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS : EG_DEC; /* special case where SL=0 */
+				}
+
+				/* recalculate EG output */
+				if ((SLOT->ssg&0x08) && (SLOT->ssgn ^ (SLOT->ssg&0x04)))  /* SSG-EG Output Inversion */
+				SLOT->vol_out = ((UINT32)(0x200 - SLOT->volume) & MAX_ATT_INDEX) + SLOT->tl;
+				else
+					SLOT->vol_out = (UINT32)SLOT->volume + SLOT->tl;
+			}
+			break;
+
+			case EG_DEC:  /* decay phase */
+			if (!(OPN->eg_cnt & ((1<<SLOT->eg_sh_d1r)-1)))
+			{
+				/* SSG EG type */
+				if (SLOT->ssg&0x08)
+				{
+					/* update attenuation level */
+					if (SLOT->volume < 0x200)
+				{
+					SLOT->volume += 4 * eg_inc[SLOT->eg_sel_d1r + ((OPN->eg_cnt>>SLOT->eg_sh_d1r)&7)];
+
+					/* recalculate EG output */
+					if (SLOT->ssgn ^ (SLOT->ssg&0x04))   /* SSG-EG Output Inversion */
+						SLOT->vol_out = ((UINT32)(0x200 - SLOT->volume) & MAX_ATT_INDEX) + SLOT->tl;
+					else
+						SLOT->vol_out = (UINT32)SLOT->volume + SLOT->tl;
+				}
+
+				}
+				else
+				{
+				/* update attenuation level */
+				SLOT->volume += eg_inc[SLOT->eg_sel_d1r + ((OPN->eg_cnt>>SLOT->eg_sh_d1r)&7)];
+
+				/* recalculate EG output */
+				SLOT->vol_out = (UINT32)SLOT->volume + SLOT->tl;
+				}
+
+				/* check phase transition*/
+				if (SLOT->volume >= (INT32)(SLOT->sl))
+					SLOT->state = EG_SUS;
+			}
+			break;
+
+			case EG_SUS:  /* sustain phase */
+			if (!(OPN->eg_cnt & ((1<<SLOT->eg_sh_d2r)-1)))
+			{
+				/* SSG EG type */
+				if (SLOT->ssg&0x08)
+				{
+				/* update attenuation level */
+				if (SLOT->volume < 0x200)
+				{
+					SLOT->volume += 4 * eg_inc[SLOT->eg_sel_d2r + ((OPN->eg_cnt>>SLOT->eg_sh_d2r)&7)];
+
+					/* recalculate EG output */
+					if (SLOT->ssgn ^ (SLOT->ssg&0x04))   /* SSG-EG Output Inversion */
+						SLOT->vol_out = ((UINT32)(0x200 - SLOT->volume) & MAX_ATT_INDEX) + SLOT->tl;
+					else
+						SLOT->vol_out = (UINT32)SLOT->volume + SLOT->tl;
+				}
+				}
+				else
+				{
+					/* update attenuation level */
+					SLOT->volume += eg_inc[SLOT->eg_sel_d2r + ((OPN->eg_cnt>>SLOT->eg_sh_d2r)&7)];
+
+					/* check phase transition*/
+					if ( SLOT->volume >= MAX_ATT_INDEX )
+						SLOT->volume = MAX_ATT_INDEX;
+					/* do not change SLOT->state (verified on real chip) */
+
+					/* recalculate EG output */
+					SLOT->vol_out = (UINT32)SLOT->volume + SLOT->tl;
+				}
+			}
+			break;
+
+			case EG_REL:  /* release phase */
+			if (!(OPN->eg_cnt & ((1<<SLOT->eg_sh_rr)-1)))
+			{
+				/* SSG EG type */
+				if (SLOT->ssg&0x08)
+				{
+					/* update attenuation level */
+					if (SLOT->volume < 0x200)
+						SLOT->volume += 4 * eg_inc[SLOT->eg_sel_rr + ((OPN->eg_cnt>>SLOT->eg_sh_rr)&7)];
+				/* check phase transition */
+				if (SLOT->volume >= 0x200)
+				{
+					SLOT->volume = MAX_ATT_INDEX;
+					SLOT->state = EG_OFF;
+				}
+				}
+				else
+				{
+					/* update attenuation level */
+					SLOT->volume += eg_inc[SLOT->eg_sel_rr + ((OPN->eg_cnt>>SLOT->eg_sh_rr)&7)];
+
+					/* check phase transition*/
+					if (SLOT->volume >= MAX_ATT_INDEX)
+					{
+						SLOT->volume = MAX_ATT_INDEX;
+						SLOT->state = EG_OFF;
+					}
+				}
+
+				/* recalculate EG output */
+				SLOT->vol_out = (UINT32)SLOT->volume + SLOT->tl;
+
+			}
+			break;
+		}
+
+		/* Valley Bell: These few lines are missing in Genesis Plus GX' ym2612 core file.
+			Disabling them fixes the SSG-EG.
+			Additional Note: Asterix and the Great Rescue: Level 1 sounds "better" with these lines,
+			but less accurate. */
+		#if 0
+		out = ((UINT32)SLOT->volume);
+
+		/* negate output (changes come from alternate bit, init comes from attack bit) */
+		if ((SLOT->ssg&0x08) && (SLOT->ssgn&2) && (SLOT->state > EG_REL))
+			out ^= MAX_ATT_INDEX;
+
+		/* we need to store the result here because we are going to change ssgn
+			in next instruction */
+		SLOT->vol_out = out + SLOT->tl;
+		#endif
+
+		SLOT++;
+		i--;
+	} while (i);
+
+}
+
+/* SSG-EG update process */
+/* The behavior is based upon Nemesis tests on real hardware */
+/* This is actually executed before each samples */
+INLINE void update_ssg_eg_channel(FM_SLOT *SLOT)
+{
+	unsigned int i = 4; /* four operators per channel */
+
+	do
+	{
+		/* detect SSG-EG transition */
+		/* this is not required during release phase as the attenuation has been forced to MAX and output invert flag is not used */
+		/* if an Attack Phase is programmed, inversion can occur on each sample */
+		if ((SLOT->ssg & 0x08) && (SLOT->volume >= 0x200) && (SLOT->state > EG_REL))
+		{
+			if (SLOT->ssg & 0x01)  /* bit 0 = hold SSG-EG */
+			{
+				/* set inversion flag */
+				if (SLOT->ssg & 0x02)
+					SLOT->ssgn = 4;
+
+				/* force attenuation level during decay phases */
+				if ((SLOT->state != EG_ATT) && !(SLOT->ssgn ^ (SLOT->ssg & 0x04)))
+					SLOT->volume  = MAX_ATT_INDEX;
+			}
+			else  /* loop SSG-EG */
+			{
+				/* toggle output inversion flag or reset Phase Generator */
+					if (SLOT->ssg & 0x02)
+						SLOT->ssgn ^= 4;
+					else
+						SLOT->phase = 0;
+
+				/* same as Key ON */
+				if (SLOT->state != EG_ATT)
+				{
+					if ((SLOT->ar + SLOT->ksr) < 94 /*32+62*/)
+					{
+						SLOT->state = (SLOT->volume <= MIN_ATT_INDEX) ? ((SLOT->sl == MIN_ATT_INDEX) ? EG_SUS : EG_DEC) : EG_ATT;
+					}
+					else
+					{
+						/* Attack Rate is maximal: directly switch to Decay or Substain */
+						SLOT->volume = MIN_ATT_INDEX;
+						SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS : EG_DEC;
+					}
+				}
+			}
+
+			/* recalculate EG output */
+			if (SLOT->ssgn ^ (SLOT->ssg&0x04))
+				SLOT->vol_out = ((UINT32)(0x200 - SLOT->volume) & MAX_ATT_INDEX) + SLOT->tl;
+			else
+				SLOT->vol_out = (UINT32)SLOT->volume + SLOT->tl;
+		}
+
+		/* next slot */
+		SLOT++;
+		i--;
+	} while (i);
+}
+
+
+INLINE void update_phase_lfo_slot(FM_OPN *OPN, FM_SLOT *SLOT, INT32 pms, UINT32 block_fnum)
+{
+	UINT32 fnum_lfo   = ((block_fnum & 0x7f0) >> 4) * 32 * 8;
+	INT32  lfo_fn_table_index_offset = lfo_pm_table[ fnum_lfo + pms + OPN->LFO_PM ];
+
+	block_fnum = block_fnum*2 + lfo_fn_table_index_offset;
+
+	if (lfo_fn_table_index_offset)    /* LFO phase modulation active */
+	{
+		UINT8 blk = (block_fnum&0x7000) >> 12;
+		UINT32 fn  = block_fnum & 0xfff;
+
+		/* recalculate keyscale code */
+		/*int kc = (blk<<2) | opn_fktable[fn >> 7];*/
+		/* This really stupid bug caused a read outside of the
+			array [size 0x10] and returned invalid values.
+			This caused an annoying vibrato for some notes.
+			(Note: seems to be a copy-and-paste from OPNWriteReg -> case 0xA0)
+			Why are MAME cores always SOO buggy ?! */
+		/* Oh, and before I forget: it's correct in fm.c */
+		int kc = (blk<<2) | opn_fktable[fn >> 8];
+		/* Thanks to Blargg - his patch that helped me to find this bug */
+
+		/* recalculate (frequency) phase increment counter */
+		int fc = (OPN->fn_table[fn]>>(7-blk)) + SLOT->DT[kc];
+
+		/* (frequency) phase overflow (credits to Nemesis) */
+		if (fc < 0) fc += OPN->fn_max;
+
+		/* update phase */
+		SLOT->phase += (fc * SLOT->mul) >> 1;
+	}
+	else    /* LFO phase modulation  = zero */
+	{
+		SLOT->phase += SLOT->Incr;
+	}
+}
+
+INLINE void update_phase_lfo_channel(FM_OPN *OPN, FM_CH *CH)
+{
+	UINT32 block_fnum = CH->block_fnum;
+
+	UINT32 fnum_lfo  = ((block_fnum & 0x7f0) >> 4) * 32 * 8;
+	INT32  lfo_fn_table_index_offset = lfo_pm_table[ fnum_lfo + CH->pms + OPN->LFO_PM ];
+
+	block_fnum = block_fnum*2 + lfo_fn_table_index_offset;
+
+	if (lfo_fn_table_index_offset)    /* LFO phase modulation active */
+	{
+		UINT8 blk = (block_fnum&0x7000) >> 12;
+		UINT32 fn  = block_fnum & 0xfff;
+
+		/* recalculate keyscale code */
+		/*int kc = (blk<<2) | opn_fktable[fn >> 7];*/
+		/* the same stupid bug as above */
+		int kc = (blk<<2) | opn_fktable[fn >> 8];
+
+		/* recalculate (frequency) phase increment counter */
+		int fc = (OPN->fn_table[fn]>>(7-blk));
+
+		/* (frequency) phase overflow (credits to Nemesis) */
+		int finc = fc + CH->SLOT[SLOT1].DT[kc];
+		if (finc < 0) finc += OPN->fn_max;
+		CH->SLOT[SLOT1].phase += (finc*CH->SLOT[SLOT1].mul) >> 1;
+
+		finc = fc + CH->SLOT[SLOT2].DT[kc];
+		if (finc < 0) finc += OPN->fn_max;
+		CH->SLOT[SLOT2].phase += (finc*CH->SLOT[SLOT2].mul) >> 1;
+
+		finc = fc + CH->SLOT[SLOT3].DT[kc];
+		if (finc < 0) finc += OPN->fn_max;
+		CH->SLOT[SLOT3].phase += (finc*CH->SLOT[SLOT3].mul) >> 1;
+
+		finc = fc + CH->SLOT[SLOT4].DT[kc];
+		if (finc < 0) finc += OPN->fn_max;
+		CH->SLOT[SLOT4].phase += (finc*CH->SLOT[SLOT4].mul) >> 1;
+	}
+	else    /* LFO phase modulation  = zero */
+	{
+		CH->SLOT[SLOT1].phase += CH->SLOT[SLOT1].Incr;
+		CH->SLOT[SLOT2].phase += CH->SLOT[SLOT2].Incr;
+		CH->SLOT[SLOT3].phase += CH->SLOT[SLOT3].Incr;
+		CH->SLOT[SLOT4].phase += CH->SLOT[SLOT4].Incr;
+	}
+}
+
+/* update phase increment and envelope generator */
+INLINE void refresh_fc_eg_slot(FM_OPN *OPN, FM_SLOT *SLOT , int fc , int kc )
+{
+	int ksr = kc >> SLOT->KSR;
+
+	fc += SLOT->DT[kc];
+
+	/* detects frequency overflow (credits to Nemesis) */
+	if (fc < 0) fc += OPN->fn_max;
+
+	/* (frequency) phase increment counter */
+	SLOT->Incr = (fc * SLOT->mul) >> 1;
+
+	if( SLOT->ksr != ksr )
+	{
+		SLOT->ksr = ksr;
+
+		/* calculate envelope generator rates */
+		if ((SLOT->ar + SLOT->ksr) < 32+62)
+		{
+			SLOT->eg_sh_ar  = eg_rate_shift [SLOT->ar  + SLOT->ksr ];
+			SLOT->eg_sel_ar = eg_rate_select2612[SLOT->ar  + SLOT->ksr ];
+		}
+		else
+		{
+			SLOT->eg_sh_ar  = 0;
+			SLOT->eg_sel_ar = 18*RATE_STEPS; /* verified by Nemesis on real hardware (Attack phase is blocked) */
+		}
+
+		SLOT->eg_sh_d1r = eg_rate_shift [SLOT->d1r + SLOT->ksr];
+		SLOT->eg_sh_d2r = eg_rate_shift [SLOT->d2r + SLOT->ksr];
+		SLOT->eg_sh_rr  = eg_rate_shift [SLOT->rr  + SLOT->ksr];
+
+		SLOT->eg_sel_d1r= eg_rate_select2612[SLOT->d1r + SLOT->ksr];
+		SLOT->eg_sel_d2r= eg_rate_select2612[SLOT->d2r + SLOT->ksr];
+		SLOT->eg_sel_rr = eg_rate_select2612[SLOT->rr  + SLOT->ksr];
+	}
+}
+
+/* update phase increment counters */
+INLINE void refresh_fc_eg_chan(FM_OPN *OPN, FM_CH *CH )
+{
+	if( CH->SLOT[SLOT1].Incr==-1)
+	{
+		int fc = CH->fc;
+		int kc = CH->kcode;
+		refresh_fc_eg_slot(OPN, &CH->SLOT[SLOT1] , fc , kc );
+		refresh_fc_eg_slot(OPN, &CH->SLOT[SLOT2] , fc , kc );
+		refresh_fc_eg_slot(OPN, &CH->SLOT[SLOT3] , fc , kc );
+		refresh_fc_eg_slot(OPN, &CH->SLOT[SLOT4] , fc , kc );
+	}
+}
+
+#define volume_calc(OP) ((OP)->vol_out + (AM & (OP)->AMmask))
+
+INLINE signed int op_calc(UINT32 phase, unsigned int env, signed int pm)
+{
+  UINT32 p;
+
+  p = (env<<3) + sin_tab[ ( ((signed int)((phase & ~FREQ_MASK) + (pm<<15))) >> FREQ_SH ) & SIN_MASK ];
+
+  if (p >= TL_TAB_LEN)
+    return 0;
+  return tl_tab[p];
+}
+
+INLINE signed int op_calc1(UINT32 phase, unsigned int env, signed int pm)
+{
+  UINT32 p = (env<<3) + sin_tab[ ( ((signed int)((phase & ~FREQ_MASK) + pm      )) >> FREQ_SH ) & SIN_MASK ];
+  if (p >= TL_TAB_LEN)
+    return 0;
+  return tl_tab[p];
+}
+
+INLINE void chan_calc(YM2612 *F2612, FM_OPN *OPN, FM_CH *CH)
+{
+  UINT32 AM = OPN->LFO_AM >> CH->ams;
+  unsigned int eg_out;
+
+  if (CH->Muted)
+    return;
+
+  OPN->m2 = OPN->c1 = OPN->c2 = OPN->mem = 0;
+
+  *CH->mem_connect = CH->mem_value;  /* restore delayed sample (MEM) value to m2 or c2 */
+
+  eg_out = volume_calc(&CH->SLOT[SLOT1]);
+  {
+    INT32 out = CH->op1_out[0] + CH->op1_out[1];
+    CH->op1_out[0] = CH->op1_out[1];
+
+    if( !CH->connect1 )
+    {
+      /* algorithm 5  */
+      OPN->mem = OPN->c1 = OPN->c2 = CH->op1_out[0];
+    }
+    else
+    {
+      /* other algorithms */
+      *CH->connect1 += CH->op1_out[0];
+    }
+
+
+    CH->op1_out[1] = 0;
+    if( eg_out < ENV_QUIET )  /* SLOT 1 */
+    {
+      if (!CH->FB)
+        out=0;
+
+      CH->op1_out[1] = op_calc1(CH->SLOT[SLOT1].phase, eg_out, (out<<CH->FB) );
+    }
+  }
+
+  eg_out = volume_calc(&CH->SLOT[SLOT3]);
+  if( eg_out < ENV_QUIET )    /* SLOT 3 */
+    *CH->connect3 += op_calc(CH->SLOT[SLOT3].phase, eg_out, OPN->m2);
+
+  eg_out = volume_calc(&CH->SLOT[SLOT2]);
+  if( eg_out < ENV_QUIET )    /* SLOT 2 */
+    *CH->connect2 += op_calc(CH->SLOT[SLOT2].phase, eg_out, OPN->c1);
+
+  eg_out = volume_calc(&CH->SLOT[SLOT4]);
+  if( eg_out < ENV_QUIET )    /* SLOT 4 */
+    *CH->connect4 += op_calc(CH->SLOT[SLOT4].phase, eg_out, OPN->c2);
+
+
+  /* store current MEM */
+  CH->mem_value = OPN->mem;
+
+  /* update phase counters AFTER output calculations */
+  if(CH->pms)
+  {
+    /* add support for 3 slot mode */
+    if ((OPN->ST.mode & 0xC0) && (CH == &F2612->CH[2]))
+    {
+      update_phase_lfo_slot(OPN, &CH->SLOT[SLOT1], CH->pms, OPN->SL3.block_fnum[1]);
+      update_phase_lfo_slot(OPN, &CH->SLOT[SLOT2], CH->pms, OPN->SL3.block_fnum[2]);
+      update_phase_lfo_slot(OPN, &CH->SLOT[SLOT3], CH->pms, OPN->SL3.block_fnum[0]);
+      update_phase_lfo_slot(OPN, &CH->SLOT[SLOT4], CH->pms, CH->block_fnum);
+    }
+    else update_phase_lfo_channel(OPN, CH);
+  }
+  else  /* no LFO phase modulation */
+  {
+    CH->SLOT[SLOT1].phase += CH->SLOT[SLOT1].Incr;
+    CH->SLOT[SLOT2].phase += CH->SLOT[SLOT2].Incr;
+    CH->SLOT[SLOT3].phase += CH->SLOT[SLOT3].Incr;
+    CH->SLOT[SLOT4].phase += CH->SLOT[SLOT4].Incr;
+  }
+}
+
+static void FMCloseTable( void )
+{
+#ifdef SAVE_SAMPLE
+	fclose(sample[0]);
+#endif
+	return;
+}
+
+
+/* CSM Key Controll */
+INLINE void CSMKeyControll(FM_OPN *OPN, FM_CH *CH)
+{
+	/* all key ON (verified by Nemesis on real hardware) */
+	FM_KEYON_CSM(OPN,CH,SLOT1);
+	FM_KEYON_CSM(OPN,CH,SLOT2);
+	FM_KEYON_CSM(OPN,CH,SLOT3);
+	FM_KEYON_CSM(OPN,CH,SLOT4);
+	OPN->SL3.key_csm = 1;
+}
+
+#ifdef __STATE_H__
+/* FM channel save , internal state only */
+static void FMsave_state_channel(running_device *device,FM_CH *CH,int num_ch)
+{
+	int slot , ch;
+
+	for(ch=0;ch<num_ch;ch++,CH++)
+	{
+		/* channel */
+		state_save_register_device_item_array(device, ch, CH->op1_out);
+		state_save_register_device_item(device, ch, CH->fc);
+		/* slots */
+		for(slot=0;slot<4;slot++)
+		{
+			FM_SLOT *SLOT = &CH->SLOT[slot];
+			state_save_register_device_item(device, ch * 4 + slot, SLOT->phase);
+			state_save_register_device_item(device, ch * 4 + slot, SLOT->state);
+			state_save_register_device_item(device, ch * 4 + slot, SLOT->volume);
+		}
+	}
+}
+
+static void FMsave_state_st(running_device *device,FM_ST *ST)
+{
+#if FM_BUSY_FLAG_SUPPORT
+	state_save_register_device_item(device, 0, ST->busy_expiry_time.seconds );
+	state_save_register_device_item(device, 0, ST->busy_expiry_time.attoseconds );
+#endif
+	state_save_register_device_item(device, 0, ST->address );
+	state_save_register_device_item(device, 0, ST->irq     );
+	state_save_register_device_item(device, 0, ST->irqmask );
+	state_save_register_device_item(device, 0, ST->status  );
+	state_save_register_device_item(device, 0, ST->mode    );
+	state_save_register_device_item(device, 0, ST->prescaler_sel );
+	state_save_register_device_item(device, 0, ST->fn_h );
+	state_save_register_device_item(device, 0, ST->TA   );
+	state_save_register_device_item(device, 0, ST->TAC  );
+	state_save_register_device_item(device, 0, ST->TB  );
+	state_save_register_device_item(device, 0, ST->TBC  );
+}
+#endif /* _STATE_H */
+
+#if BUILD_OPN
+/* write a OPN mode register 0x20-0x2f */
+static void OPNWriteMode(FM_OPN *OPN, int r, int v)
+{
+	UINT8 c;
+	FM_CH *CH;
+
+	switch(r)
+	{
+	case 0x21:	/* Test */
+		break;
+	case 0x22:	/* LFO FREQ (YM2608/YM2610/YM2610B/YM2612) */
+		if (v&8) /* LFO enabled ? */
+		{
+			#if 0
+			if (!OPN->lfo_timer_overflow)
+			{
+				/* restart LFO */
+				OPN->lfo_cnt   = 0;
+				OPN->lfo_timer = 0;
+				OPN->LFO_AM    = 0;
+				OPN->LFO_PM    = 0;
+			}
+			#endif
+
+			OPN->lfo_timer_overflow = lfo_samples_per_step[v&7] << LFO_SH;
+		}
+		else
+		{
+			/* Valley Bell: Ported from Genesis Plus GX 1.71
+				hold LFO waveform in reset state */
+			OPN->lfo_timer_overflow = 0;
+			OPN->lfo_timer = 0;
+			OPN->lfo_cnt = 0;
+
+
+			OPN->LFO_PM = 0;
+			OPN->LFO_AM = 126;
+			/* OPN->lfo_timer_overflow = 0; */
+		}
+		break;
+	case 0x24:	/* timer A High 8*/
+		OPN->ST.TA = (OPN->ST.TA & 0x03)|(((int)v)<<2);
+		break;
+	case 0x25:	/* timer A Low 2*/
+		OPN->ST.TA = (OPN->ST.TA & 0x3fc)|(v&3);
+		break;
+	case 0x26:	/* timer B */
+		OPN->ST.TB = (UINT8)v;
+		break;
+	case 0x27:	/* mode, timer control */
+		set_timers( OPN, &(OPN->ST),OPN->ST.param,v );
+		break;
+	case 0x28:	/* key on / off */
+		c = v & 0x03;
+		if( c == 3 ) break;
+		if( (v&0x04) && (OPN->type & TYPE_6CH) ) c+=3;
+		CH = OPN->P_CH;
+		CH = &CH[c];
+		if(v&0x10) FM_KEYON(OPN,CH,SLOT1); else FM_KEYOFF(OPN,CH,SLOT1);
+		if(v&0x20) FM_KEYON(OPN,CH,SLOT2); else FM_KEYOFF(OPN,CH,SLOT2);
+		if(v&0x40) FM_KEYON(OPN,CH,SLOT3); else FM_KEYOFF(OPN,CH,SLOT3);
+		if(v&0x80) FM_KEYON(OPN,CH,SLOT4); else FM_KEYOFF(OPN,CH,SLOT4);
+		break;
+	}
+}
+
+/* write a OPN register (0x30-0xff) */
+static void OPNWriteReg(FM_OPN *OPN, int r, int v)
+{
+	FM_CH *CH;
+	FM_SLOT *SLOT;
+
+	UINT8 c = OPN_CHAN(r);
+
+	if (c == 3) return; /* 0xX3,0xX7,0xXB,0xXF */
+
+	if (r >= 0x100) c+=3;
+
+	CH = OPN->P_CH;
+	CH = &CH[c];
+
+	SLOT = &(CH->SLOT[OPN_SLOT(r)]);
+
+	switch( r & 0xf0 ) {
+	case 0x30:	/* DET , MUL */
+		set_det_mul(&OPN->ST,CH,SLOT,v);
+		break;
+
+	case 0x40:	/* TL */
+		set_tl(CH,SLOT,v);
+		break;
+
+	case 0x50:	/* KS, AR */
+		set_ar_ksr(OPN->type,CH,SLOT,v);
+		break;
+
+	case 0x60:	/* bit7 = AM ENABLE, DR */
+		set_dr(OPN->type, SLOT,v);
+
+		if(OPN->type & TYPE_LFOPAN) /* YM2608/2610/2610B/2612 */
+		{
+			SLOT->AMmask = (v&0x80) ? ~0 : 0;
+		}
+		break;
+
+	case 0x70:	/*     SR */
+		set_sr(OPN->type,SLOT,v);
+		break;
+
+	case 0x80:	/* SL, RR */
+		set_sl_rr(OPN->type,SLOT,v);
+		break;
+
+	case 0x90:	/* SSG-EG */
+		SLOT->ssg  =  v&0x0f;
+
+		  /* recalculate EG output */
+		if (SLOT->state > EG_REL)
+		{
+			if ((SLOT->ssg&0x08) && (SLOT->ssgn ^ (SLOT->ssg&0x04)))
+				SLOT->vol_out = ((UINT32)(0x200 - SLOT->volume) & MAX_ATT_INDEX) + SLOT->tl;
+			else
+				SLOT->vol_out = (UINT32)SLOT->volume + SLOT->tl;
+		}
+
+		/* SSG-EG envelope shapes :
+
+        E AtAlH
+        1 0 0 0  \\\\
+
+        1 0 0 1  \___
+
+        1 0 1 0  \/\/
+                  ___
+        1 0 1 1  \
+
+        1 1 0 0  ////
+                  ___
+        1 1 0 1  /
+
+        1 1 1 0  /\/\
+
+        1 1 1 1  /___
+
+
+        E = SSG-EG enable
+
+
+        The shapes are generated using Attack, Decay and Sustain phases.
+
+        Each single character in the diagrams above represents this whole
+        sequence:
+
+        - when KEY-ON = 1, normal Attack phase is generated (*without* any
+          difference when compared to normal mode),
+
+        - later, when envelope level reaches minimum level (max volume),
+          the EG switches to Decay phase (which works with bigger steps
+          when compared to normal mode - see below),
+
+        - later when envelope level passes the SL level,
+          the EG swithes to Sustain phase (which works with bigger steps
+          when compared to normal mode - see below),
+
+        - finally when envelope level reaches maximum level (min volume),
+          the EG switches to Attack phase again (depends on actual waveform).
+
+        Important is that when switch to Attack phase occurs, the phase counter
+        of that operator will be zeroed-out (as in normal KEY-ON) but not always.
+        (I havent found the rule for that - perhaps only when the output level is low)
+
+        The difference (when compared to normal Envelope Generator mode) is
+        that the resolution in Decay and Sustain phases is 4 times lower;
+        this results in only 256 steps instead of normal 1024.
+        In other words:
+        when SSG-EG is disabled, the step inside of the EG is one,
+        when SSG-EG is enabled, the step is four (in Decay and Sustain phases).
+
+        Times between the level changes are the same in both modes.
+
+
+        Important:
+        Decay 1 Level (so called SL) is compared to actual SSG-EG output, so
+        it is the same in both SSG and no-SSG modes, with this exception:
+
+        when the SSG-EG is enabled and is generating raising levels
+        (when the EG output is inverted) the SL will be found at wrong level !!!
+        For example, when SL=02:
+            0 -6 = -6dB in non-inverted EG output
+            96-6 = -90dB in inverted EG output
+        Which means that EG compares its level to SL as usual, and that the
+        output is simply inverted afterall.
+
+
+        The Yamaha's manuals say that AR should be set to 0x1f (max speed).
+        That is not necessary, but then EG will be generating Attack phase.
+
+        */
+
+
+		break;
+
+	case 0xa0:
+		switch( OPN_SLOT(r) )
+		{
+		case 0:		/* 0xa0-0xa2 : FNUM1 */
+#ifdef USE_VGM_INIT_SWITCH
+			if (IsVGMInit)
+				OPN->ST.fn_h = CH->block_fnum >> 8;
+#endif
+			{
+				UINT32 fn = (((UINT32)( (OPN->ST.fn_h)&7))<<8) + v;
+				UINT8 blk = OPN->ST.fn_h>>3;
+				/* keyscale code */
+				CH->kcode = (blk<<2) | opn_fktable[fn >> 7];
+				/* phase increment counter */
+				CH->fc = OPN->fn_table[fn*2]>>(7-blk);
+
+				/* store fnum in clear form for LFO PM calculations */
+				CH->block_fnum = (blk<<11) | fn;
+
+				CH->SLOT[SLOT1].Incr=-1;
+			}
+			break;
+		case 1:		/* 0xa4-0xa6 : FNUM2,BLK */
+			OPN->ST.fn_h = v&0x3f;
+#ifdef USE_VGM_INIT_SWITCH
+			if (IsVGMInit)	// workaround for stupid Kega Fusion init block
+				CH->block_fnum = (OPN->ST.fn_h << 8) | (CH->block_fnum & 0xFF);
+#endif
+			break;
+		case 2:		/* 0xa8-0xaa : 3CH FNUM1 */
+#ifdef USE_VGM_INIT_SWITCH
+			if (IsVGMInit)
+				OPN->SL3.fn_h = OPN->SL3.block_fnum[c] >> 8;
+#endif
+			if(r < 0x100)
+			{
+				UINT32 fn = (((UINT32)(OPN->SL3.fn_h&7))<<8) + v;
+				UINT8 blk = OPN->SL3.fn_h>>3;
+				/* keyscale code */
+				OPN->SL3.kcode[c]= (blk<<2) | opn_fktable[fn >> 7];
+				/* phase increment counter */
+				OPN->SL3.fc[c] = OPN->fn_table[fn*2]>>(7-blk);
+				OPN->SL3.block_fnum[c] = (blk<<11) | fn;
+				(OPN->P_CH)[2].SLOT[SLOT1].Incr=-1;
+			}
+			break;
+		case 3:		/* 0xac-0xae : 3CH FNUM2,BLK */
+			if(r < 0x100)
+			{
+				OPN->SL3.fn_h = v&0x3f;
+#ifdef USE_VGM_INIT_SWITCH
+				if (IsVGMInit)
+					OPN->SL3.block_fnum[c] = (OPN->SL3.fn_h << 8) | (OPN->SL3.block_fnum[c] & 0xFF);
+#endif
+			}
+			break;
+		}
+		break;
+
+	case 0xb0:
+		switch( OPN_SLOT(r) )
+		{
+		case 0:		/* 0xb0-0xb2 : FB,ALGO */
+			{
+				unsigned char feedback = ((v>>3)&7);
+				CH->ALGO = v&7;
+				CH->FB   = feedback ? feedback + 6 : 0;
+				setup_connection( OPN, CH, c );
+			}
+			break;
+		case 1:		/* 0xb4-0xb6 : L , R , AMS , PMS (YM2612/YM2610B/YM2610/YM2608) */
+			if( OPN->type & TYPE_LFOPAN)
+			{
+				/* b0-2 PMS */
+				CH->pms = (v & 7) * 32; /* CH->pms = PM depth * 32 (index in lfo_pm_table) */
+
+				/* b4-5 AMS */
+				CH->ams = lfo_ams_depth_shift[(v>>4) & 0x03];
+
+				/* PAN :  b7 = L, b6 = R */
+				OPN->pan[ c*2   ] = (v & 0x80) ? ~0 : 0;
+				OPN->pan[ c*2+1 ] = (v & 0x40) ? ~0 : 0;
+
+			}
+			break;
+		}
+		break;
+	}
+}
+
+/* initialize time tables */
+static void init_timetables(FM_OPN *OPN, double freqbase)
+{
+	int i,d;
+	double rate;
+
+	/* DeTune table */
+	for (d = 0;d <= 3;d++)
+	{
+		for (i = 0;i <= 31;i++)
+		{
+			rate = ((double)dt_tab[d*32 + i]) * freqbase * (1<<(FREQ_SH-10)); /* -10 because chip works with 10.10 fixed point, while we use 16.16 */
+			OPN->ST.dt_tab[d][i]   = (INT32) rate;
+			OPN->ST.dt_tab[d+4][i] = -OPN->ST.dt_tab[d][i];
+		}
+	}
+
+	/* there are 2048 FNUMs that can be generated using FNUM/BLK registers
+	but LFO works with one more bit of a precision so we really need 4096 elements */
+	/* calculate fnumber -> increment counter table */
+	for(i = 0; i < 4096; i++)
+	{
+		/* freq table for octave 7 */
+		/* OPN phase increment counter = 20bit */
+		/* the correct formula is : F-Number = (144 * fnote * 2^20 / M) / 2^(B-1) */
+		/* where sample clock is  M/144 */
+		/* this means the increment value for one clock sample is FNUM * 2^(B-1) = FNUM * 64 for octave 7 */
+		/* we also need to handle the ratio between the chip frequency and the emulated frequency (can be 1.0)  */
+		OPN->fn_table[i] = (UINT32)( (double)i * 32 * freqbase * (1<<(FREQ_SH-10)) ); /* -10 because chip works with 10.10 fixed point, while we use 16.16 */
+	}
+
+	/* maximal frequency is required for Phase overflow calculation, register size is 17 bits (Nemesis) */
+	OPN->fn_max = (UINT32)( (double)0x20000 * freqbase * (1<<(FREQ_SH-10)) );
+}
+
+/* prescaler set (and make time tables) */
+static void OPNSetPres(FM_OPN *OPN, int pres, int timer_prescaler, int SSGpres)
+{
+	/* frequency base */
+	OPN->ST.freqbase = (OPN->ST.rate) ? ((double)OPN->ST.clock / OPN->ST.rate) / pres : 0;
+
+	/* EG is updated every 3 samples */
+	OPN->eg_timer_add  = (UINT32)((1<<EG_SH) * OPN->ST.freqbase);
+	OPN->eg_timer_overflow = ( 3 ) * (1<<EG_SH);
+
+	/* LFO timer increment (every samples) */
+	OPN->lfo_timer_add  = (UINT32)((1<<LFO_SH) * OPN->ST.freqbase);
+
+	/* Timer base time */
+	OPN->ST.timer_prescaler = timer_prescaler;
+
+	/* SSG part  prescaler set */
+	if( SSGpres ) (*OPN->ST.SSG->set_clock)( OPN->ST.param, OPN->ST.clock * 2 / SSGpres );
+
+	/* make time tables */
+	init_timetables(OPN, OPN->ST.freqbase);
+}
+
+static void reset_channels( FM_ST *ST , FM_CH *CH , int num )
+{
+    int c,s;
+    (void)ST;
+
+	for( c = 0 ; c < num ; c++ )
+	{
+		/* memset(&CH[c], 0x00, sizeof(FM_CH)); */
+		CH[c].mem_value = 0;
+		CH[c].op1_out[0] = 0;
+		CH[c].op1_out[1] = 0;
+		CH[c].fc = 0;
+		for(s = 0 ; s < 4 ; s++ )
+		{
+			/* memset(&CH[c].SLOT[s], 0x00, sizeof(FM_SLOT)); */
+			CH[c].SLOT[s].Incr = -1;
+			CH[c].SLOT[s].key = 0;
+			CH[c].SLOT[s].phase = 0;
+			CH[c].SLOT[s].ssg = 0;
+			CH[c].SLOT[s].ssgn = 0;
+			CH[c].SLOT[s].state= EG_OFF;
+			CH[c].SLOT[s].volume = MAX_ATT_INDEX;
+			CH[c].SLOT[s].vol_out= MAX_ATT_INDEX;
+		}
+	}
+}
+
+/* initialize generic tables */
+static void init_tables(void)
+{
+	signed int i,x;
+	signed int n;
+	double o,m;
+
+	/* build Linear Power Table */
+	for (x=0; x<TL_RES_LEN; x++)
+	{
+		m = (1<<16) / pow(2, (x+1) * (ENV_STEP/4.0) / 8.0);
+		m = floor(m);
+
+		/* we never reach (1<<16) here due to the (x+1) */
+		/* result fits within 16 bits at maximum */
+
+		n = (int)m;		/* 16 bits here */
+		n >>= 4;		/* 12 bits here */
+		if (n&1)		/* round to nearest */
+			n = (n>>1)+1;
+		else
+			n = n>>1;
+						/* 11 bits here (rounded) */
+		n <<= 2;		/* 13 bits here (as in real chip) */
+
+
+		/* 14 bits (with sign bit) */
+		tl_tab[ x*2 + 0 ] = n;
+		tl_tab[ x*2 + 1 ] = -tl_tab[ x*2 + 0 ];
+
+		/* one entry in the 'Power' table use the following format, xxxxxyyyyyyyys with:            */
+		/*        s = sign bit                                                                      */
+		/* yyyyyyyy = 8-bits decimal part (0-TL_RES_LEN)                                            */
+		/* xxxxx    = 5-bits integer 'shift' value (0-31) but, since Power table output is 13 bits, */
+		/*            any value above 13 (included) would be discarded.                             */
+		for (i=1; i<13; i++)
+		{
+			tl_tab[ x*2+0 + i*2*TL_RES_LEN ] =  tl_tab[ x*2+0 ]>>i;
+			tl_tab[ x*2+1 + i*2*TL_RES_LEN ] = -tl_tab[ x*2+0 + i*2*TL_RES_LEN ];
+		}
+	}
+
+	/* build Logarithmic Sinus table */
+	for (i=0; i<SIN_LEN; i++)
+	{
+		/* non-standard sinus */
+		m = sin( ((i*2)+1) * M_PI / SIN_LEN ); /* checked against the real chip */
+		/* we never reach zero here due to ((i*2)+1) */
+
+		if (m>0.0)
+			o = 8*log(1.0/m)/log(2.0);	/* convert to 'decibels' */
+		else
+			o = 8*log(-1.0/m)/log(2.0);	/* convert to 'decibels' */
+
+		o = o / (ENV_STEP/4);
+
+		n = (int)(2.0*o);
+		if (n&1)    		/* round to nearest */
+			n = (n>>1)+1;
+		else
+			n = n>>1;
+
+		/* 13-bits (8.5) value is formatted for above 'Power' table */
+		sin_tab[ i ] = n*2 + (m>=0.0? 0: 1 );
+	}
+
+	/* build LFO PM modulation table */
+	for(i = 0; i < 8; i++) /* 8 PM depths */
+	{
+		UINT8 fnum;
+		for (fnum=0; fnum<128; fnum++) /* 7 bits meaningful of F-NUMBER */
+		{
+			UINT8 value;
+			UINT8 step;
+			UINT32 offset_depth = i;
+			UINT32 offset_fnum_bit;
+			UINT32 bit_tmp;
+
+			for (step=0; step<8; step++)
+			{
+				value = 0;
+				for (bit_tmp=0; bit_tmp<7; bit_tmp++) /* 7 bits */
+				{
+					if (fnum & (1<<bit_tmp)) /* only if bit "bit_tmp" is set */
+					{
+						offset_fnum_bit = bit_tmp * 8;
+						value += lfo_pm_output[offset_fnum_bit + offset_depth][step];
+					}
+				}
+				/* 32 steps for LFO PM (sinus) */
+				lfo_pm_table[(fnum*32*8) + (i*32) + step   + 0] = value;
+				lfo_pm_table[(fnum*32*8) + (i*32) +(step^7)+ 8] = value;
+				lfo_pm_table[(fnum*32*8) + (i*32) + step   +16] = -value;
+				lfo_pm_table[(fnum*32*8) + (i*32) +(step^7)+24] = -value;
+			}
+
+		}
+	}
+
+#ifdef SAVE_SAMPLE
+	sample[0]=fopen("sampsum.pcm","wb");
+#endif
+}
+
+#endif /* BUILD_OPN */
+
+
+/*******************************************************************************/
+/*      YM2612 local section                                                   */
+/*******************************************************************************/
+
+static void ym2612_generate(void *chip, FMSAMPLE *buffer, int frames, int mix)
+{
+	YM2612 *F2612 = (YM2612 *)chip;
+	FM_CH  *cch = F2612->CH;
+	FMSAMPLE  *bufOut = buffer;
+	int i;
+#if !RSM_ENABLE
+	FMSAMPLE bufTmp[2];
+#endif
+
+	ym2612_pre_generate(chip);
+
+	if (!frames)
+	{
+		update_ssg_eg_channel(&cch[0].SLOT[SLOT1]);
+		update_ssg_eg_channel(&cch[1].SLOT[SLOT1]);
+		update_ssg_eg_channel(&cch[2].SLOT[SLOT1]);
+		update_ssg_eg_channel(&cch[3].SLOT[SLOT1]);
+		update_ssg_eg_channel(&cch[4].SLOT[SLOT1]);
+		update_ssg_eg_channel(&cch[5].SLOT[SLOT1]);
+	}
+
+	/* buffering */
+	for(i=0 ; i < frames ; i++)
+	{
+#if RSM_ENABLE
+		while(F2612->OPN.ST.framecnt >= F2612->OPN.ST.rateratio)/* Copy-Pasta from Nuked */
+		{
+			/* Copy-Pasta from Nuked */
+			F2612->OPN.ST.prev_sample[0] = F2612->OPN.ST.cur_sample[0];
+			F2612->OPN.ST.prev_sample[1] = F2612->OPN.ST.cur_sample[1];
+			ym2612_generate_one_native(chip, F2612->OPN.ST.cur_sample);
+			F2612->OPN.ST.framecnt -= F2612->OPN.ST.rateratio;
+			/* Copy-Pasta from Nuked */
+		}
+		if (mix)
+		{
+			*bufOut++ += (FMSAMPLE)((F2612->OPN.ST.prev_sample[0] * (F2612->OPN.ST.rateratio - F2612->OPN.ST.framecnt)
+								  + F2612->OPN.ST.cur_sample[0] * F2612->OPN.ST.framecnt) / F2612->OPN.ST.rateratio);
+			*bufOut++ += (FMSAMPLE)((F2612->OPN.ST.prev_sample[1] * (F2612->OPN.ST.rateratio - F2612->OPN.ST.framecnt)
+								  + F2612->OPN.ST.cur_sample[1] * F2612->OPN.ST.framecnt) / F2612->OPN.ST.rateratio);
+		} else {
+			*bufOut++ = (FMSAMPLE)((F2612->OPN.ST.prev_sample[0] * (F2612->OPN.ST.rateratio - F2612->OPN.ST.framecnt)
+								  + F2612->OPN.ST.cur_sample[0] * F2612->OPN.ST.framecnt) / F2612->OPN.ST.rateratio);
+			*bufOut++ = (FMSAMPLE)((F2612->OPN.ST.prev_sample[1] * (F2612->OPN.ST.rateratio - F2612->OPN.ST.framecnt)
+								  + F2612->OPN.ST.cur_sample[1] * F2612->OPN.ST.framecnt) / F2612->OPN.ST.rateratio);
+		}
+		F2612->OPN.ST.framecnt += 1 << RSM_FRAC;
+#else
+		if (mix)
+		{
+			ym2612_generate_one_native(chip, bufTmp);
+			bufOut[0] += bufTmp[0];
+			bufOut[1] += bufTmp[1];
+		}
+		else
+		{
+			ym2612_generate_one_native(chip, bufOut);
+		}
+		bufOut += 2;
+#endif
+	}
+	/* ym2612_post_generate(chip, frames); */
+}
+
+void ym2612_pre_generate(void *chip)
+{
+	YM2612 *F2612 = (YM2612 *)chip;
+	FM_OPN *OPN   = &F2612->OPN;
+	FM_CH  *cch = F2612->CH;
+
+	/* refresh PG and EG */
+	refresh_fc_eg_chan( OPN, &cch[0] );
+	refresh_fc_eg_chan( OPN, &cch[1] );
+	if( (OPN->ST.mode & 0xc0) )
+	{
+		/* 3SLOT MODE */
+		if( cch[2].SLOT[SLOT1].Incr==-1)
+		{
+			refresh_fc_eg_slot(OPN, &cch[2].SLOT[SLOT1] , OPN->SL3.fc[1] , OPN->SL3.kcode[1] );
+			refresh_fc_eg_slot(OPN, &cch[2].SLOT[SLOT2] , OPN->SL3.fc[2] , OPN->SL3.kcode[2] );
+			refresh_fc_eg_slot(OPN, &cch[2].SLOT[SLOT3] , OPN->SL3.fc[0] , OPN->SL3.kcode[0] );
+			refresh_fc_eg_slot(OPN, &cch[2].SLOT[SLOT4] , cch[2].fc , cch[2].kcode );
+		}
+	} else
+		refresh_fc_eg_chan( OPN, &cch[2] );
+	refresh_fc_eg_chan( OPN, &cch[3] );
+	refresh_fc_eg_chan( OPN, &cch[4] );
+	refresh_fc_eg_chan( OPN, &cch[5] );
+}
+
+void ym2612_generate_one_native(void *chip, FMSAMPLE buffer[])
+{
+	YM2612 *F2612 = (YM2612 *)chip;
+	FM_OPN *OPN   = &F2612->OPN;
+	INT32  *out_fm = OPN->out_fm;
+	FM_CH  *cch = F2612->CH;
+	INT32 dacout;
+	int lt,rt;
+
+	if (! F2612->MuteDAC)
+		dacout = F2612->dacout;
+	else
+		dacout = 0;
+
+	/* clear outputs */
+	out_fm[0] = 0;
+	out_fm[1] = 0;
+	out_fm[2] = 0;
+	out_fm[3] = 0;
+	out_fm[4] = 0;
+	out_fm[5] = 0;
+
+	/* update SSG-EG output */
+	update_ssg_eg_channel(&cch[0].SLOT[SLOT1]);
+	update_ssg_eg_channel(&cch[1].SLOT[SLOT1]);
+	update_ssg_eg_channel(&cch[2].SLOT[SLOT1]);
+	update_ssg_eg_channel(&cch[3].SLOT[SLOT1]);
+	update_ssg_eg_channel(&cch[4].SLOT[SLOT1]);
+	update_ssg_eg_channel(&cch[5].SLOT[SLOT1]);
+
+	/* calculate FM */
+	if (! F2612->dac_test)
+	{
+		chan_calc(F2612, OPN, &cch[0]);
+		chan_calc(F2612, OPN, &cch[1]);
+		chan_calc(F2612, OPN, &cch[2]);
+		chan_calc(F2612, OPN, &cch[3]);
+		chan_calc(F2612, OPN, &cch[4]);
+		if( F2612->dacen )
+			cch[5].connect4 += dacout;
+		else
+			chan_calc(F2612, OPN, &cch[5]);
+	}
+	else
+	{
+		out_fm[0] = out_fm[1] = dacout;
+		out_fm[2] = out_fm[3] = dacout;
+		out_fm[5] = dacout;
+	}
+
+	/* advance LFO */
+	advance_lfo(OPN);
+
+	/* advance envelope generator */
+	OPN->eg_timer += OPN->eg_timer_add;
+	while (OPN->eg_timer >= OPN->eg_timer_overflow)
+	{
+		/* reset EG timer */
+		OPN->eg_timer -= OPN->eg_timer_overflow;
+		/* increment EG counter */
+		OPN->eg_cnt++;
+		/* EG counter is 12-bit only and zero value is skipped (verified on real hardware) */
+		if (OPN->eg_cnt == 4096)
+			OPN->eg_cnt = 1;
+
+		/* advance envelope generator */
+		advance_eg_channel(OPN, &cch[0].SLOT[SLOT1]);
+		advance_eg_channel(OPN, &cch[1].SLOT[SLOT1]);
+		advance_eg_channel(OPN, &cch[2].SLOT[SLOT1]);
+		advance_eg_channel(OPN, &cch[3].SLOT[SLOT1]);
+		advance_eg_channel(OPN, &cch[4].SLOT[SLOT1]);
+		advance_eg_channel(OPN, &cch[5].SLOT[SLOT1]);
+	}
+
+	/*fprintf(hFile, "%u", FileSample, out_fm[0]);
+	for (lt = 0; lt < 6; lt ++)
+		fprintf(hFile, "\t%d", out_fm[lt]);
+	fprintf(hFile, "\n");
+	FileSample ++;*/
+
+	if (out_fm[0] > 8192) out_fm[0] = 8192;
+	else if (out_fm[0] < -8192) out_fm[0] = -8192;
+	if (out_fm[1] > 8192) out_fm[1] = 8192;
+	else if (out_fm[1] < -8192) out_fm[1] = -8192;
+	if (out_fm[2] > 8192) out_fm[2] = 8192;
+	else if (out_fm[2] < -8192) out_fm[2] = -8192;
+	if (out_fm[3] > 8192) out_fm[3] = 8192;
+	else if (out_fm[3] < -8192) out_fm[3] = -8192;
+	if (out_fm[4] > 8192) out_fm[4] = 8192;
+	else if (out_fm[4] < -8192) out_fm[4] = -8192;
+	if (out_fm[5] > 8192) out_fm[5] = 8192;
+	else if (out_fm[5] < -8192) out_fm[5] = -8192;
+
+	/* 6-channels mixing  */
+	lt  = ((out_fm[0]>>0) & OPN->pan[0]);
+	rt  = ((out_fm[0]>>0) & OPN->pan[1]);
+	lt += ((out_fm[1]>>0) & OPN->pan[2]);
+	rt += ((out_fm[1]>>0) & OPN->pan[3]);
+	lt += ((out_fm[2]>>0) & OPN->pan[4]);
+	rt += ((out_fm[2]>>0) & OPN->pan[5]);
+	lt += ((out_fm[3]>>0) & OPN->pan[6]);
+	rt += ((out_fm[3]>>0) & OPN->pan[7]);
+	if (! F2612->dac_test)
+	{
+		lt += ((out_fm[4]>>0) & OPN->pan[8]);
+		rt += ((out_fm[4]>>0) & OPN->pan[9]);
+	}
+	else
+	{
+		lt += dacout;
+		lt += dacout;
+	}
+	lt += ((out_fm[5]>>0) & OPN->pan[10]);
+	rt += ((out_fm[5]>>0) & OPN->pan[11]);
+
+	/* Limit( lt, MAXOUT, MINOUT ); */
+	/* Limit( rt, MAXOUT, MINOUT ); */
+
+	#ifdef SAVE_SAMPLE
+		SAVE_ALL_CHANNELS
+	#endif
+
+	/* buffering */
+	if (F2612->WaveOutMode & 0x01)
+		F2612->WaveL = lt;
+	if (F2612->WaveOutMode & 0x02)
+		F2612->WaveR = rt;
+	if (F2612->WaveOutMode ^ 0x03)
+		F2612->WaveOutMode ^= 0x03;
+
+	buffer[0] = (FMSAMPLE)(F2612->WaveL / 2);
+	buffer[1] = (FMSAMPLE)(F2612->WaveR / 2);
+
+	/* CSM mode: if CSM Key ON has occured, CSM Key OFF need to be sent       */
+	/* only if Timer A does not overflow again (i.e CSM Key ON not set again) */
+	OPN->SL3.key_csm <<= 1;
+
+	/* timer A control */
+	/* INTERNAL_TIMER_A( &OPN->ST , cch[2] ) */
+	{
+		if( OPN->ST.TAC &&  (OPN->ST.timer_handler==0) )
+			if( (OPN->ST.TAC -= (int)(OPN->ST.freqbase*4096)) <= 0 )
+			{
+				TimerAOver( &OPN->ST );
+				/* CSM mode total level latch and auto key on */
+				if( OPN->ST.mode & 0x80 )
+					CSMKeyControll( OPN, &cch[2] );
+			}
+	}
+
+	/* CSM Mode Key ON still disabled */
+	if (OPN->SL3.key_csm & 2)
+	{
+		/* CSM Mode Key OFF (verified by Nemesis on real hardware) */
+		FM_KEYOFF_CSM(&cch[2],SLOT1);
+		FM_KEYOFF_CSM(&cch[2],SLOT2);
+		FM_KEYOFF_CSM(&cch[2],SLOT3);
+		FM_KEYOFF_CSM(&cch[2],SLOT4);
+		OPN->SL3.key_csm = 0;
+	}
+}
+
+#if 0
+void ym2612_post_generate(void *chip, int length)
+{
+	YM2612 *F2612 = (YM2612 *)chip;
+	/* timer B control */
+	INTERNAL_TIMER_B(&F2612->OPN.ST, length);
+}
+#endif
+
+#ifdef __STATE_H__
+void ym2612_postload(void *chip)
+{
+	if (chip)
+	{
+		YM2612 *F2612 = (YM2612 *)chip;
+		int r;
+
+		/* DAC data & port */
+		F2612->dacout = ((int)F2612->REGS[0x2a] - 0x80) << 6;	/* level unknown */
+		F2612->dacen  = F2612->REGS[0x2d] & 0x80;
+		/* OPN registers */
+		/* DT / MULTI , TL , KS / AR , AMON / DR , SR , SL / RR , SSG-EG */
+		for(r=0x30;r<0x9e;r++)
+			if((r&3) != 3)
+			{
+				OPNWriteReg(&F2612->OPN,r,F2612->REGS[r]);
+				OPNWriteReg(&F2612->OPN,r|0x100,F2612->REGS[r|0x100]);
+			}
+		/* FB / CONNECT , L / R / AMS / PMS */
+		for(r=0xb0;r<0xb6;r++)
+			if((r&3) != 3)
+			{
+				OPNWriteReg(&F2612->OPN,r,F2612->REGS[r]);
+				OPNWriteReg(&F2612->OPN,r|0x100,F2612->REGS[r|0x100]);
+			}
+		/* channels */
+		/*FM_channel_postload(F2612->CH,6);*/
+	}
+}
+
+static void YM2612_save_state(YM2612 *F2612, running_device *device)
+{
+	state_save_register_device_item_array(device, 0, F2612->REGS);
+	FMsave_state_st(device,&F2612->OPN.ST);
+	FMsave_state_channel(device,F2612->CH,6);
+	/* 3slots */
+	state_save_register_device_item_array(device, 0, F2612->OPN.SL3.fc);
+	state_save_register_device_item(device, 0, F2612->OPN.SL3.fn_h);
+	state_save_register_device_item_array(device, 0, F2612->OPN.SL3.kcode);
+	/* address register1 */
+	state_save_register_device_item(device, 0, F2612->addr_A1);
+}
+#endif /* _STATE_H */
+
+/* initialize YM2612 emulator(s) */
+static void * ym2612_init(void *param, int clock, int rate,
+				FM_TIMERHANDLER timer_handler,FM_IRQHANDLER IRQHandler)
+{
+	YM2612 *F2612;
+
+	if (clock <= 0 || rate <= 0)
+		return NULL; /* Forbid zero clock and sample rate */
+
+	/* allocate extend state space */
+	/* F2612 = auto_alloc_clear(device->machine, YM2612); */
+	F2612 = (YM2612 *)malloc(sizeof(YM2612));
+	if (F2612 == NULL)
+		return NULL;
+	memset(F2612, 0x00, sizeof(YM2612));
+	/* allocate total level table (128kb space) */
+	init_tables();
+
+	F2612->OPN.ST.param = param;
+	F2612->OPN.type = TYPE_YM2612;
+	F2612->OPN.P_CH = F2612->CH;
+	/* F2612->OPN.ST.device = device; */
+	F2612->OPN.ST.clock = clock;
+#if RSM_ENABLE
+	F2612->OPN.ST.rate = 53267;
+	F2612->OPN.ST.rateratio = (INT32)(UINT32)((((UINT64)144 * rate) << RSM_FRAC) / clock);
+	F2612->OPN.ST.framecnt = 1 << RSM_FRAC;
+	memset(&(F2612->OPN.ST.cur_sample), 0x00, sizeof(FMSAMPLE) * 2);
+	memset(&(F2612->OPN.ST.prev_sample), 0x00, sizeof(FMSAMPLE) * 2);
+#else
+	F2612->OPN.ST.rate = rate;
+#endif
+	/* F2612->OPN.ST.irq = 0; */
+	/* F2612->OPN.ST.status = 0; */
+	/* Extend handler */
+	F2612->OPN.ST.timer_handler = timer_handler;
+	F2612->OPN.ST.IRQ_Handler   = IRQHandler;
+
+	if (PseudoSt)
+		F2612->WaveOutMode = 0x01;
+	else
+		F2612->WaveOutMode = 0x03;
+	/*hFile = fopen("YM2612.log", "wt");
+	fprintf(hFile, "Clock: %d, Sample Rate: %d\n", clock, rate);
+	fprintf(hFile, "Sample\tCh 0\tCh 1\tCh 2\tCh 3\tCh 4\tCh 5\n");
+	FileSample = 0;*/
+
+#ifdef __STATE_H__
+	YM2612_save_state(F2612, device);
+#endif
+	return F2612;
+}
+
+/* shut down emulator */
+static void ym2612_shutdown(void *chip)
+{
+	YM2612 *F2612 = (YM2612 *)chip;
+	/* fclose(hFile); */
+
+	FMCloseTable();
+	/* auto_free(F2612->OPN.ST.device->machine, F2612); */
+	free(F2612);
+}
+
+/* reset one of chip */
+static void ym2612_reset_chip(void *chip)
+{
+	int i;
+	YM2612 *F2612 = (YM2612 *)chip;
+	FM_OPN *OPN   = &F2612->OPN;
+
+	OPNSetPres( OPN, 6*24, 6*24, 0);
+	/* status clear */
+	FM_IRQMASK_SET(&OPN->ST,0x03);
+	FM_BUSY_CLEAR(&OPN->ST);
+	/* OPNWriteMode(OPN,0x27,0x30); */ /* mode 0 , timer reset */
+
+#if RSM_ENABLE
+	/* Resampler's state */
+	F2612->OPN.ST.framecnt = 1 << RSM_FRAC;
+	memset(&(F2612->OPN.ST.cur_sample), 0x00, sizeof(FMSAMPLE) * 2);
+	memset(&(F2612->OPN.ST.prev_sample), 0x00, sizeof(FMSAMPLE) * 2);
+#endif
+
+	OPN->eg_timer = 0;
+	OPN->eg_cnt   = 0;
+
+	OPN->lfo_timer = 0;
+	OPN->lfo_cnt   = 0;
+	OPN->LFO_AM    = 126;
+	OPN->LFO_PM    = 0;
+
+	OPN->ST.TAC    = 0;
+	OPN->ST.TBC    = 0;
+
+	OPN->SL3.key_csm = 0;
+
+	OPN->ST.status = 0;
+	OPN->ST.mode = 0;
+
+	memset(F2612->REGS, 0x00, sizeof(UINT8) * 512);
+
+	OPNWriteMode(OPN,0x22,0x00);
+
+	OPNWriteMode(OPN,0x27,0x30);
+	OPNWriteMode(OPN,0x26,0x00);
+	OPNWriteMode(OPN,0x25,0x00);
+	OPNWriteMode(OPN,0x24,0x00);
+
+	reset_channels( &OPN->ST , &F2612->CH[0] , 6 );
+
+	for(i = 0xb6 ; i >= 0xb4 ; i-- )
+	{
+		OPNWriteReg(OPN,i      ,0xc0);
+		OPNWriteReg(OPN,i|0x100,0xc0);
+	}
+	for(i = 0xb2 ; i >= 0x30 ; i-- )
+	{
+		OPNWriteReg(OPN,i      ,0);
+		OPNWriteReg(OPN,i|0x100,0);
+	}
+
+	/* DAC mode clear */
+	F2612->dacen = 0;
+	F2612->dac_test = 0;
+	F2612->dacout = 0;
+
+	if (F2612->WaveOutMode == 0x02)
+		F2612->WaveOutMode >>= 1;
+}
+
+/* YM2612 write */
+/* n = number  */
+/* a = address */
+/* v = value   */
+static int ym2612_write(void *chip, int a, UINT8 v)
+{
+	YM2612 *F2612 = (YM2612 *)chip;
+	int addr;
+
+	v &= 0xff;	/* adjust to 8 bit bus */
+
+	switch( a&3)
+	{
+	case 0:	/* address port 0 */
+		F2612->OPN.ST.address = v;
+		F2612->addr_A1 = 0;
+		break;
+
+	case 1:	/* data port 0    */
+		if (F2612->addr_A1 != 0)
+			break;	/* verified on real YM2608 */
+
+		addr = F2612->OPN.ST.address;
+		F2612->REGS[addr] = v;
+		switch( addr & 0xf0 )
+		{
+		case 0x20:	/* 0x20-0x2f Mode */
+			switch( addr )
+			{
+			case 0x2a:	/* DAC data (YM2612) */
+				ym2612_update_one(chip, DUMMYBUF, 0);
+				F2612->dacout = ((int)v - 0x80) << 6;	/* level unknown */
+				break;
+			case 0x2b:	/* DAC Sel  (YM2612) */
+				/* b7 = dac enable */
+				F2612->dacen = v & 0x80;
+				break;
+			case 0x2C:	/* undocumented: DAC Test Reg */
+				/* b5 = volume enable */
+				F2612->dac_test = v & 0x20;
+				break;
+			default:	/* OPN section */
+				/* ym2612_update_req(F2612->OPN.ST.param); */
+				ym2612_update_one(chip, DUMMYBUF, 0);
+				/* write register */
+				OPNWriteMode(&(F2612->OPN),addr,v);
+			}
+			break;
+		default:	/* 0x30-0xff OPN section */
+			ym2612_update_one(chip, DUMMYBUF, 0);
+			/* write register */
+			OPNWriteReg(&(F2612->OPN),addr,v);
+		}
+		break;
+
+	case 2:	/* address port 1 */
+		F2612->OPN.ST.address = v;
+		F2612->addr_A1 = 1;
+		break;
+
+	case 3:	/* data port 1    */
+		if (F2612->addr_A1 != 1)
+			break;	/* verified on real YM2608 */
+
+		addr = F2612->OPN.ST.address;
+		F2612->REGS[addr | 0x100] = v;
+		ym2612_update_one(chip, DUMMYBUF, 0);
+		OPNWriteReg(&(F2612->OPN),addr | 0x100,v);
+		break;
+	}
+	return F2612->OPN.ST.irq;
+}
+
+#if 0
+static UINT8 ym2612_read(void *chip,int a)
+{
+	YM2612 *F2612 = (YM2612 *)chip;
+
+	switch( a&3)
+	{
+	case 0:	/* status 0 */
+		return FM_STATUS_FLAG(&F2612->OPN.ST);
+	case 1:
+	case 2:
+	case 3:
+		/* LOG(LOG_WAR,("YM2612 #%p:A=%d read unmapped area\n",F2612->OPN.ST.param,a)); */
+		return FM_STATUS_FLAG(&F2612->OPN.ST);
+	}
+	return 0;
+}
+
+static int ym2612_timer_over(void *chip,int c)
+{
+	YM2612 *F2612 = (YM2612 *)chip;
+
+	if( c )
+	{	/* Timer B */
+		TimerBOver( &(F2612->OPN.ST) );
+	}
+	else
+	{	/* Timer A */
+		ym2612_update_one(chip, DUMMYBUF, 0);
+		/* timer update */
+		TimerAOver( &(F2612->OPN.ST) );
+		/* CSM mode key,TL controll */
+		if ((F2612->OPN.ST.mode & 0xc0) == 0x80)
+		{	/* CSM mode total level latch and auto key on */
+			CSMKeyControll( &F2612->OPN, &(F2612->CH[2]) );
+		}
+	}
+	return F2612->OPN.ST.irq;
+}
+#endif
+
+static void ym2612_set_mutemask(void *chip, UINT32 MuteMask)
+{
+	YM2612 *F2612 = (YM2612 *)chip;
+	UINT8 CurChn;
+
+	for (CurChn = 0; CurChn < 6; CurChn ++)
+		F2612->CH[CurChn].Muted = (MuteMask >> CurChn) & 0x01;
+	F2612->MuteDAC = (MuteMask >> 6) & 0x01;
+
+	return;
+}
+#if 0
+static void ym2612_setoptions(UINT8 Flags)
+{
+	PseudoSt = (Flags >> 2) & 0x01;
+
+	return;
+}
+#endif
+
+} // Ym2612_MameImpl
+
+
+Ym2612_MAME_Emu::Ym2612_MAME_Emu() { impl = 0; }
+
+Ym2612_MAME_Emu::~Ym2612_MAME_Emu()
+{
+	if ( impl ) Ym2612_MameImpl::ym2612_shutdown( impl );
+}
+
+const char *Ym2612_MAME_Emu::set_rate(double sample_rate, double clock_rate)
+{
+	if ( impl ) Ym2612_MameImpl::ym2612_shutdown( impl );
+	impl = Ym2612_MameImpl::ym2612_init( NULL, static_cast<int>(clock_rate), static_cast<int>(sample_rate), NULL, NULL );
+	if ( !impl )
+		return "Out of memory";
+	return 0;
+}
+
+void Ym2612_MAME_Emu::reset()
+{
+	if ( impl ) Ym2612_MameImpl::ym2612_reset_chip( impl );
+}
+
+void Ym2612_MAME_Emu::mute_voices(int mask)
+{
+	if ( impl ) Ym2612_MameImpl::ym2612_set_mutemask( impl, mask );
+}
+
+void Ym2612_MAME_Emu::write0(int addr, int data)
+{
+	if ( !impl ) return;
+	Ym2612_MameImpl::ym2612_write( impl, 0, static_cast<uint8_t>(addr) );
+	Ym2612_MameImpl::ym2612_write( impl, 1, static_cast<uint8_t>(data) );
+}
+
+void Ym2612_MAME_Emu::write1(int addr, int data)
+{
+	if ( !impl ) return;
+	Ym2612_MameImpl::ym2612_write( impl, 0 + 2, static_cast<uint8_t>(addr) );
+	Ym2612_MameImpl::ym2612_write( impl, 1 + 2, static_cast<uint8_t>(data) );
+}
+
+void Ym2612_MAME_Emu::run(int pair_count, Ym2612_MAME_Emu::sample_t *out)
+{
+	if ( impl ) Ym2612_MameImpl::ym2612_generate( impl, out, pair_count, 0);
+}
diff --git a/libraries/game-music-emu/gme/Ym2612_MAME.h b/libraries/game-music-emu/gme/Ym2612_MAME.h
new file mode 100644
index 000000000..03831065a
--- /dev/null
+++ b/libraries/game-music-emu/gme/Ym2612_MAME.h
@@ -0,0 +1,38 @@
+// YM2612 FM sound chip emulator interface
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef YM2612_EMU_H
+#define YM2612_EMU_H
+
+typedef void Ym2612_MAME_Impl;
+
+class Ym2612_MAME_Emu  {
+	Ym2612_MAME_Impl* impl;
+public:
+	Ym2612_MAME_Emu();
+	~Ym2612_MAME_Emu();
+
+	// Set output sample rate and chip clock rates, in Hz. Returns non-zero
+	// if error.
+	const char* set_rate( double sample_rate, double clock_rate );
+
+	// Reset to power-up state
+	void reset();
+
+	// Mute voice n if bit n (1 << n) of mask is set
+	enum { channel_count = 6 };
+	void mute_voices( int mask );
+
+	// Write addr to register 0 then data to register 1
+	void write0( int addr, int data );
+
+	// Write addr to register 2 then data to register 3
+	void write1( int addr, int data );
+
+	// Run and add pair_count samples into current output buffer contents
+	typedef short sample_t;
+	enum { out_chan_count = 2 }; // stereo
+	void run( int pair_count, sample_t* out );
+};
+
+#endif
diff --git a/libraries/game-music-emu/gme/Ym2612_Nuked.cpp b/libraries/game-music-emu/gme/Ym2612_Nuked.cpp
new file mode 100644
index 000000000..fc49ac690
--- /dev/null
+++ b/libraries/game-music-emu/gme/Ym2612_Nuked.cpp
@@ -0,0 +1,1872 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+// Based on Nuked OPN2 ym3438.c and ym3438.h
+
+#include "Ym2612_Nuked.h"
+
+/*
+ * Copyright (C) 2017 Alexey Khokholov (Nuke.YKT)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ *
+ *  Nuked OPN2(Yamaha YM3438) emulator.
+ *  Thanks:
+ *      Silicon Pr0n:
+ *          Yamaha YM3438 decap and die shot(digshadow).
+ *      OPLx decapsulated(Matthew Gambrell, Olli Niemitalo):
+ *          OPL2 ROMs.
+ *
+ * version: 1.0.7
+ */
+
+
+#include <stdint.h>
+#include <string.h>
+
+typedef uintptr_t       Bitu;
+typedef intptr_t        Bits;
+typedef uint64_t        Bit64u;
+typedef int64_t         Bit64s;
+typedef uint32_t        Bit32u;
+typedef int32_t         Bit32s;
+typedef uint16_t        Bit16u;
+typedef int16_t         Bit16s;
+typedef uint8_t         Bit8u;
+typedef int8_t Bit8s;
+
+namespace Ym2612_NukedImpl
+{
+
+/*EXTRA*/
+#define RSM_FRAC 10
+#define OPN_WRITEBUF_SIZE 2048
+#define OPN_WRITEBUF_DELAY 15
+
+enum {
+    ym3438_type_discrete = 0,   /* Discrete YM3438 (Teradrive)          */
+    ym3438_type_asic = 1,       /* ASIC YM3438 (MD1 VA7, MD2, MD3, etc) */
+    ym3438_type_ym2612 = 2      /* YM2612 (MD1, MD2 VA2)                */
+};
+
+/*EXTRA*/
+typedef struct _opn2_writebuf {
+    Bit64u time;
+    Bit8u port;
+    Bit8u data;
+    Bit8u reserved[6];
+} opn2_writebuf;
+
+typedef struct
+{
+    Bit32u cycles;
+    Bit32u channel;
+    Bit16s mol, mor;
+    /* IO */
+    Bit16u write_data;
+    Bit8u write_a;
+    Bit8u write_d;
+    Bit8u write_a_en;
+    Bit8u write_d_en;
+    Bit8u write_busy;
+    Bit8u write_busy_cnt;
+    Bit8u write_fm_address;
+    Bit8u write_fm_data;
+    Bit8u write_fm_mode_a;
+    Bit16u address;
+    Bit8u data;
+    Bit8u pin_test_in;
+    Bit8u pin_irq;
+    Bit8u busy;
+    /* LFO */
+    Bit8u lfo_en;
+    Bit8u lfo_freq;
+    Bit8u lfo_pm;
+    Bit8u lfo_am;
+    Bit8u lfo_cnt;
+    Bit8u lfo_inc;
+    Bit8u lfo_quotient;
+    /* Phase generator */
+    Bit16u pg_fnum;
+    Bit8u pg_block;
+    Bit8u pg_kcode;
+    Bit32u pg_inc[24];
+    Bit32u pg_phase[24];
+    Bit8u pg_reset[24];
+    Bit32u pg_read;
+    /* Envelope generator */
+    Bit8u eg_cycle;
+    Bit8u eg_cycle_stop;
+    Bit8u eg_shift;
+    Bit8u eg_shift_lock;
+    Bit8u eg_timer_low_lock;
+    Bit16u eg_timer;
+    Bit8u eg_timer_inc;
+    Bit16u eg_quotient;
+    Bit8u eg_custom_timer;
+    Bit8u eg_rate;
+    Bit8u eg_ksv;
+    Bit8u eg_inc;
+    Bit8u eg_ratemax;
+    Bit8u eg_sl[2];
+    Bit8u eg_lfo_am;
+    Bit8u eg_tl[2];
+    Bit8u eg_state[24];
+    Bit16u eg_level[24];
+    Bit16u eg_out[24];
+    Bit8u eg_kon[24];
+    Bit8u eg_kon_csm[24];
+    Bit8u eg_kon_latch[24];
+    Bit8u eg_csm_mode[24];
+    Bit8u eg_ssg_enable[24];
+    Bit8u eg_ssg_pgrst_latch[24];
+    Bit8u eg_ssg_repeat_latch[24];
+    Bit8u eg_ssg_hold_up_latch[24];
+    Bit8u eg_ssg_dir[24];
+    Bit8u eg_ssg_inv[24];
+    Bit32u eg_read[2];
+    Bit8u eg_read_inc;
+    /* FM */
+    Bit16s fm_op1[6][2];
+    Bit16s fm_op2[6];
+    Bit16s fm_out[24];
+    Bit16u fm_mod[24];
+    /* Channel */
+    Bit16s ch_acc[6];
+    Bit16s ch_out[6];
+    Bit16s ch_lock;
+    Bit8u ch_lock_l;
+    Bit8u ch_lock_r;
+    Bit16s ch_read;
+    /* Timer */
+    Bit16u timer_a_cnt;
+    Bit16u timer_a_reg;
+    Bit8u timer_a_load_lock;
+    Bit8u timer_a_load;
+    Bit8u timer_a_enable;
+    Bit8u timer_a_reset;
+    Bit8u timer_a_load_latch;
+    Bit8u timer_a_overflow_flag;
+    Bit8u timer_a_overflow;
+
+    Bit16u timer_b_cnt;
+    Bit8u timer_b_subcnt;
+    Bit16u timer_b_reg;
+    Bit8u timer_b_load_lock;
+    Bit8u timer_b_load;
+    Bit8u timer_b_enable;
+    Bit8u timer_b_reset;
+    Bit8u timer_b_load_latch;
+    Bit8u timer_b_overflow_flag;
+    Bit8u timer_b_overflow;
+
+    /* Register set */
+    Bit8u mode_test_21[8];
+    Bit8u mode_test_2c[8];
+    Bit8u mode_ch3;
+    Bit8u mode_kon_channel;
+    Bit8u mode_kon_operator[4];
+    Bit8u mode_kon[24];
+    Bit8u mode_csm;
+    Bit8u mode_kon_csm;
+    Bit8u dacen;
+    Bit16s dacdata;
+
+    Bit8u ks[24];
+    Bit8u ar[24];
+    Bit8u sr[24];
+    Bit8u dt[24];
+    Bit8u multi[24];
+    Bit8u sl[24];
+    Bit8u rr[24];
+    Bit8u dr[24];
+    Bit8u am[24];
+    Bit8u tl[24];
+    Bit8u ssg_eg[24];
+
+    Bit16u fnum[6];
+    Bit8u block[6];
+    Bit8u kcode[6];
+    Bit16u fnum_3ch[6];
+    Bit8u block_3ch[6];
+    Bit8u kcode_3ch[6];
+    Bit8u reg_a4;
+    Bit8u reg_ac;
+    Bit8u connect[6];
+    Bit8u fb[6];
+    Bit8u pan_l[6], pan_r[6];
+    Bit8u ams[6];
+    Bit8u pms[6];
+
+    /*EXTRA*/
+    Bit32u mute[7];
+    Bit32s rateratio;
+    Bit32s samplecnt;
+    Bit32s oldsamples[2];
+    Bit32s samples[2];
+
+    Bit64u writebuf_samplecnt;
+    Bit32u writebuf_cur;
+    Bit32u writebuf_last;
+    Bit64u writebuf_lasttime;
+    opn2_writebuf writebuf[OPN_WRITEBUF_SIZE];
+} ym3438_t;
+
+/* EXTRA, original was "void OPN2_Reset(ym3438_t *chip)" */
+void OPN2_Reset(ym3438_t *chip, Bit32u rate, Bit32u clock);
+void OPN2_SetChipType(Bit32u type);
+void OPN2_Clock(ym3438_t *chip, Bit16s *buffer);
+void OPN2_Write(ym3438_t *chip, Bit32u port, Bit8u data);
+void OPN2_SetTestPin(ym3438_t *chip, Bit32u value);
+Bit32u OPN2_ReadTestPin(ym3438_t *chip);
+Bit32u OPN2_ReadIRQPin(ym3438_t *chip);
+Bit8u OPN2_Read(ym3438_t *chip, Bit32u port);
+
+/*EXTRA*/
+void OPN2_WriteBuffered(ym3438_t *chip, Bit32u port, Bit8u data);
+void OPN2_Generate(ym3438_t *chip, Bit16s *buf);
+void OPN2_GenerateResampled(ym3438_t *chip, Bit16s *buf);
+void OPN2_GenerateStream(ym3438_t *chip, Bit16s *output, Bit32u numsamples);
+void OPN2_GenerateStreamMix(ym3438_t *chip, Bit16s *output, Bit32u numsamples);
+void OPN2_SetOptions(Bit8u flags);
+void OPN2_SetMute(ym3438_t *chip, Bit32u mute);
+
+
+
+
+
+enum {
+    eg_num_attack = 0,
+    eg_num_decay = 1,
+    eg_num_sustain = 2,
+    eg_num_release = 3
+};
+
+/* logsin table */
+static const Bit16u logsinrom[256] = {
+    0x859, 0x6c3, 0x607, 0x58b, 0x52e, 0x4e4, 0x4a6, 0x471,
+    0x443, 0x41a, 0x3f5, 0x3d3, 0x3b5, 0x398, 0x37e, 0x365,
+    0x34e, 0x339, 0x324, 0x311, 0x2ff, 0x2ed, 0x2dc, 0x2cd,
+    0x2bd, 0x2af, 0x2a0, 0x293, 0x286, 0x279, 0x26d, 0x261,
+    0x256, 0x24b, 0x240, 0x236, 0x22c, 0x222, 0x218, 0x20f,
+    0x206, 0x1fd, 0x1f5, 0x1ec, 0x1e4, 0x1dc, 0x1d4, 0x1cd,
+    0x1c5, 0x1be, 0x1b7, 0x1b0, 0x1a9, 0x1a2, 0x19b, 0x195,
+    0x18f, 0x188, 0x182, 0x17c, 0x177, 0x171, 0x16b, 0x166,
+    0x160, 0x15b, 0x155, 0x150, 0x14b, 0x146, 0x141, 0x13c,
+    0x137, 0x133, 0x12e, 0x129, 0x125, 0x121, 0x11c, 0x118,
+    0x114, 0x10f, 0x10b, 0x107, 0x103, 0x0ff, 0x0fb, 0x0f8,
+    0x0f4, 0x0f0, 0x0ec, 0x0e9, 0x0e5, 0x0e2, 0x0de, 0x0db,
+    0x0d7, 0x0d4, 0x0d1, 0x0cd, 0x0ca, 0x0c7, 0x0c4, 0x0c1,
+    0x0be, 0x0bb, 0x0b8, 0x0b5, 0x0b2, 0x0af, 0x0ac, 0x0a9,
+    0x0a7, 0x0a4, 0x0a1, 0x09f, 0x09c, 0x099, 0x097, 0x094,
+    0x092, 0x08f, 0x08d, 0x08a, 0x088, 0x086, 0x083, 0x081,
+    0x07f, 0x07d, 0x07a, 0x078, 0x076, 0x074, 0x072, 0x070,
+    0x06e, 0x06c, 0x06a, 0x068, 0x066, 0x064, 0x062, 0x060,
+    0x05e, 0x05c, 0x05b, 0x059, 0x057, 0x055, 0x053, 0x052,
+    0x050, 0x04e, 0x04d, 0x04b, 0x04a, 0x048, 0x046, 0x045,
+    0x043, 0x042, 0x040, 0x03f, 0x03e, 0x03c, 0x03b, 0x039,
+    0x038, 0x037, 0x035, 0x034, 0x033, 0x031, 0x030, 0x02f,
+    0x02e, 0x02d, 0x02b, 0x02a, 0x029, 0x028, 0x027, 0x026,
+    0x025, 0x024, 0x023, 0x022, 0x021, 0x020, 0x01f, 0x01e,
+    0x01d, 0x01c, 0x01b, 0x01a, 0x019, 0x018, 0x017, 0x017,
+    0x016, 0x015, 0x014, 0x014, 0x013, 0x012, 0x011, 0x011,
+    0x010, 0x00f, 0x00f, 0x00e, 0x00d, 0x00d, 0x00c, 0x00c,
+    0x00b, 0x00a, 0x00a, 0x009, 0x009, 0x008, 0x008, 0x007,
+    0x007, 0x007, 0x006, 0x006, 0x005, 0x005, 0x005, 0x004,
+    0x004, 0x004, 0x003, 0x003, 0x003, 0x002, 0x002, 0x002,
+    0x002, 0x001, 0x001, 0x001, 0x001, 0x001, 0x001, 0x001,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000
+};
+
+/* exp table */
+static const Bit16u exprom[256] = {
+    0x000, 0x003, 0x006, 0x008, 0x00b, 0x00e, 0x011, 0x014,
+    0x016, 0x019, 0x01c, 0x01f, 0x022, 0x025, 0x028, 0x02a,
+    0x02d, 0x030, 0x033, 0x036, 0x039, 0x03c, 0x03f, 0x042,
+    0x045, 0x048, 0x04b, 0x04e, 0x051, 0x054, 0x057, 0x05a,
+    0x05d, 0x060, 0x063, 0x066, 0x069, 0x06c, 0x06f, 0x072,
+    0x075, 0x078, 0x07b, 0x07e, 0x082, 0x085, 0x088, 0x08b,
+    0x08e, 0x091, 0x094, 0x098, 0x09b, 0x09e, 0x0a1, 0x0a4,
+    0x0a8, 0x0ab, 0x0ae, 0x0b1, 0x0b5, 0x0b8, 0x0bb, 0x0be,
+    0x0c2, 0x0c5, 0x0c8, 0x0cc, 0x0cf, 0x0d2, 0x0d6, 0x0d9,
+    0x0dc, 0x0e0, 0x0e3, 0x0e7, 0x0ea, 0x0ed, 0x0f1, 0x0f4,
+    0x0f8, 0x0fb, 0x0ff, 0x102, 0x106, 0x109, 0x10c, 0x110,
+    0x114, 0x117, 0x11b, 0x11e, 0x122, 0x125, 0x129, 0x12c,
+    0x130, 0x134, 0x137, 0x13b, 0x13e, 0x142, 0x146, 0x149,
+    0x14d, 0x151, 0x154, 0x158, 0x15c, 0x160, 0x163, 0x167,
+    0x16b, 0x16f, 0x172, 0x176, 0x17a, 0x17e, 0x181, 0x185,
+    0x189, 0x18d, 0x191, 0x195, 0x199, 0x19c, 0x1a0, 0x1a4,
+    0x1a8, 0x1ac, 0x1b0, 0x1b4, 0x1b8, 0x1bc, 0x1c0, 0x1c4,
+    0x1c8, 0x1cc, 0x1d0, 0x1d4, 0x1d8, 0x1dc, 0x1e0, 0x1e4,
+    0x1e8, 0x1ec, 0x1f0, 0x1f5, 0x1f9, 0x1fd, 0x201, 0x205,
+    0x209, 0x20e, 0x212, 0x216, 0x21a, 0x21e, 0x223, 0x227,
+    0x22b, 0x230, 0x234, 0x238, 0x23c, 0x241, 0x245, 0x249,
+    0x24e, 0x252, 0x257, 0x25b, 0x25f, 0x264, 0x268, 0x26d,
+    0x271, 0x276, 0x27a, 0x27f, 0x283, 0x288, 0x28c, 0x291,
+    0x295, 0x29a, 0x29e, 0x2a3, 0x2a8, 0x2ac, 0x2b1, 0x2b5,
+    0x2ba, 0x2bf, 0x2c4, 0x2c8, 0x2cd, 0x2d2, 0x2d6, 0x2db,
+    0x2e0, 0x2e5, 0x2e9, 0x2ee, 0x2f3, 0x2f8, 0x2fd, 0x302,
+    0x306, 0x30b, 0x310, 0x315, 0x31a, 0x31f, 0x324, 0x329,
+    0x32e, 0x333, 0x338, 0x33d, 0x342, 0x347, 0x34c, 0x351,
+    0x356, 0x35b, 0x360, 0x365, 0x36a, 0x370, 0x375, 0x37a,
+    0x37f, 0x384, 0x38a, 0x38f, 0x394, 0x399, 0x39f, 0x3a4,
+    0x3a9, 0x3ae, 0x3b4, 0x3b9, 0x3bf, 0x3c4, 0x3c9, 0x3cf,
+    0x3d4, 0x3da, 0x3df, 0x3e4, 0x3ea, 0x3ef, 0x3f5, 0x3fa
+};
+
+/* Note table */
+static const Bit32u fn_note[16] = {
+    0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 3
+};
+
+/* Envelope generator */
+static const Bit32u eg_stephi[4][4] = {
+    { 0, 0, 0, 0 },
+    { 1, 0, 0, 0 },
+    { 1, 0, 1, 0 },
+    { 1, 1, 1, 0 }
+};
+
+static const Bit8u eg_am_shift[4] = {
+    7, 3, 1, 0
+};
+
+/* Phase generator */
+static const Bit32u pg_detune[8] = { 16, 17, 19, 20, 22, 24, 27, 29 };
+
+static const Bit32u pg_lfo_sh1[8][8] = {
+    { 7, 7, 7, 7, 7, 7, 7, 7 },
+    { 7, 7, 7, 7, 7, 7, 7, 7 },
+    { 7, 7, 7, 7, 7, 7, 1, 1 },
+    { 7, 7, 7, 7, 1, 1, 1, 1 },
+    { 7, 7, 7, 1, 1, 1, 1, 0 },
+    { 7, 7, 1, 1, 0, 0, 0, 0 },
+    { 7, 7, 1, 1, 0, 0, 0, 0 },
+    { 7, 7, 1, 1, 0, 0, 0, 0 }
+};
+
+static const Bit32u pg_lfo_sh2[8][8] = {
+    { 7, 7, 7, 7, 7, 7, 7, 7 },
+    { 7, 7, 7, 7, 2, 2, 2, 2 },
+    { 7, 7, 7, 2, 2, 2, 7, 7 },
+    { 7, 7, 2, 2, 7, 7, 2, 2 },
+    { 7, 7, 2, 7, 7, 7, 2, 7 },
+    { 7, 7, 7, 2, 7, 7, 2, 1 },
+    { 7, 7, 7, 2, 7, 7, 2, 1 },
+    { 7, 7, 7, 2, 7, 7, 2, 1 }
+};
+
+/* Address decoder */
+static const Bit32u op_offset[12] = {
+    0x000, /* Ch1 OP1/OP2 */
+    0x001, /* Ch2 OP1/OP2 */
+    0x002, /* Ch3 OP1/OP2 */
+    0x100, /* Ch4 OP1/OP2 */
+    0x101, /* Ch5 OP1/OP2 */
+    0x102, /* Ch6 OP1/OP2 */
+    0x004, /* Ch1 OP3/OP4 */
+    0x005, /* Ch2 OP3/OP4 */
+    0x006, /* Ch3 OP3/OP4 */
+    0x104, /* Ch4 OP3/OP4 */
+    0x105, /* Ch5 OP3/OP4 */
+    0x106  /* Ch6 OP3/OP4 */
+};
+
+static const Bit32u ch_offset[6] = {
+    0x000, /* Ch1 */
+    0x001, /* Ch2 */
+    0x002, /* Ch3 */
+    0x100, /* Ch4 */
+    0x101, /* Ch5 */
+    0x102  /* Ch6 */
+};
+
+/* LFO */
+static const Bit32u lfo_cycles[8] = {
+    108, 77, 71, 67, 62, 44, 8, 5
+};
+
+/* FM algorithm */
+static const Bit32u fm_algorithm[4][6][8] = {
+    {
+        { 1, 1, 1, 1, 1, 1, 1, 1 }, /* OP1_0         */
+        { 1, 1, 1, 1, 1, 1, 1, 1 }, /* OP1_1         */
+        { 0, 0, 0, 0, 0, 0, 0, 0 }, /* OP2           */
+        { 0, 0, 0, 0, 0, 0, 0, 0 }, /* Last operator */
+        { 0, 0, 0, 0, 0, 0, 0, 0 }, /* Last operator */
+        { 0, 0, 0, 0, 0, 0, 0, 1 }  /* Out           */
+    },
+    {
+        { 0, 1, 0, 0, 0, 1, 0, 0 }, /* OP1_0         */
+        { 0, 0, 0, 0, 0, 0, 0, 0 }, /* OP1_1         */
+        { 1, 1, 1, 0, 0, 0, 0, 0 }, /* OP2           */
+        { 0, 0, 0, 0, 0, 0, 0, 0 }, /* Last operator */
+        { 0, 0, 0, 0, 0, 0, 0, 0 }, /* Last operator */
+        { 0, 0, 0, 0, 0, 1, 1, 1 }  /* Out           */
+    },
+    {
+        { 0, 0, 0, 0, 0, 0, 0, 0 }, /* OP1_0         */
+        { 0, 0, 0, 0, 0, 0, 0, 0 }, /* OP1_1         */
+        { 0, 0, 0, 0, 0, 0, 0, 0 }, /* OP2           */
+        { 1, 0, 0, 1, 1, 1, 1, 0 }, /* Last operator */
+        { 0, 0, 0, 0, 0, 0, 0, 0 }, /* Last operator */
+        { 0, 0, 0, 0, 1, 1, 1, 1 }  /* Out           */
+    },
+    {
+        { 0, 0, 1, 0, 0, 1, 0, 0 }, /* OP1_0         */
+        { 0, 0, 0, 0, 0, 0, 0, 0 }, /* OP1_1         */
+        { 0, 0, 0, 1, 0, 0, 0, 0 }, /* OP2           */
+        { 1, 1, 0, 1, 1, 0, 0, 0 }, /* Last operator */
+        { 0, 0, 1, 0, 0, 0, 0, 0 }, /* Last operator */
+        { 1, 1, 1, 1, 1, 1, 1, 1 }  /* Out           */
+    }
+};
+
+static Bit32u chip_type = ym3438_type_discrete;
+
+void OPN2_DoIO(ym3438_t *chip)
+{
+    /* Write signal check */
+    chip->write_a_en = (chip->write_a & 0x03) == 0x01;
+    chip->write_d_en = (chip->write_d & 0x03) == 0x01;
+    chip->write_a <<= 1;
+    chip->write_d <<= 1;
+    /* Busy counter */
+    chip->busy = chip->write_busy;
+    chip->write_busy_cnt += chip->write_busy;
+    chip->write_busy = (chip->write_busy && !(chip->write_busy_cnt >> 5)) || chip->write_d_en;
+    chip->write_busy_cnt &= 0x1f;
+}
+
+void OPN2_DoRegWrite(ym3438_t *chip)
+{
+    Bit32u i;
+    Bit32u slot = chip->cycles % 12;
+    Bit32u address;
+    Bit32u channel = chip->channel;
+    /* Update registers */
+    if (chip->write_fm_data)
+    {
+        /* Slot */
+        if (op_offset[slot] == (chip->address & 0x107))
+        {
+            if (chip->address & 0x08)
+            {
+                /* OP2, OP4 */
+                slot += 12;
+            }
+            address = chip->address & 0xf0;
+            switch (address)
+            {
+            case 0x30: /* DT, MULTI */
+                chip->multi[slot] = chip->data & 0x0f;
+                if (!chip->multi[slot])
+                {
+                    chip->multi[slot] = 1;
+                }
+                else
+                {
+                    chip->multi[slot] <<= 1;
+                }
+                chip->dt[slot] = (chip->data >> 4) & 0x07;
+                break;
+            case 0x40: /* TL */
+                chip->tl[slot] = chip->data & 0x7f;
+                break;
+            case 0x50: /* KS, AR */
+                chip->ar[slot] = chip->data & 0x1f;
+                chip->ks[slot] = (chip->data >> 6) & 0x03;
+                break;
+            case 0x60: /* AM, DR */
+                chip->dr[slot] = chip->data & 0x1f;
+                chip->am[slot] = (chip->data >> 7) & 0x01;
+                break;
+            case 0x70: /* SR */
+                chip->sr[slot] = chip->data & 0x1f;
+                break;
+            case 0x80: /* SL, RR */
+                chip->rr[slot] = chip->data & 0x0f;
+                chip->sl[slot] = (chip->data >> 4) & 0x0f;
+                chip->sl[slot] |= (chip->sl[slot] + 1) & 0x10;
+                break;
+            case 0x90: /* SSG-EG */
+                chip->ssg_eg[slot] = chip->data & 0x0f;
+                break;
+            default:
+                break;
+            }
+        }
+
+        /* Channel */
+        if (ch_offset[channel] == (chip->address & 0x103))
+        {
+            address = chip->address & 0xfc;
+            switch (address)
+            {
+            case 0xa0:
+                chip->fnum[channel] = (chip->data & 0xff) | ((chip->reg_a4 & 0x07) << 8);
+                chip->block[channel] = (chip->reg_a4 >> 3) & 0x07;
+                chip->kcode[channel] = (chip->block[channel] << 2) | fn_note[chip->fnum[channel] >> 7];
+                break;
+            case 0xa4:
+                chip->reg_a4 = chip->data & 0xff;
+                break;
+            case 0xa8:
+                chip->fnum_3ch[channel] = (chip->data & 0xff) | ((chip->reg_ac & 0x07) << 8);
+                chip->block_3ch[channel] = (chip->reg_ac >> 3) & 0x07;
+                chip->kcode_3ch[channel] = (chip->block_3ch[channel] << 2) | fn_note[chip->fnum_3ch[channel] >> 7];
+                break;
+            case 0xac:
+                chip->reg_ac = chip->data & 0xff;
+                break;
+            case 0xb0:
+                chip->connect[channel] = chip->data & 0x07;
+                chip->fb[channel] = (chip->data >> 3) & 0x07;
+                break;
+            case 0xb4:
+                chip->pms[channel] = chip->data & 0x07;
+                chip->ams[channel] = (chip->data >> 4) & 0x03;
+                chip->pan_l[channel] = (chip->data >> 7) & 0x01;
+                chip->pan_r[channel] = (chip->data >> 6) & 0x01;
+                break;
+            default:
+                break;
+            }
+        }
+    }
+
+    if (chip->write_a_en || chip->write_d_en)
+    {
+        /* Data */
+        if (chip->write_a_en)
+        {
+            chip->write_fm_data = 0;
+        }
+
+        if (chip->write_fm_address && chip->write_d_en)
+        {
+            chip->write_fm_data = 1;
+        }
+
+        /* Address */
+        if (chip->write_a_en)
+        {
+            if ((chip->write_data & 0xf0) != 0x00)
+            {
+                /* FM Write */
+                chip->address = chip->write_data;
+                chip->write_fm_address = 1;
+            }
+            else
+            {
+                /* SSG write */
+                chip->write_fm_address = 0;
+            }
+        }
+
+        /* FM Mode */
+        /* Data */
+        if (chip->write_d_en && (chip->write_data & 0x100) == 0)
+        {
+            switch (chip->address)
+            {
+            case 0x21: /* LSI test 1 */
+                for (i = 0; i < 8; i++)
+                {
+                    chip->mode_test_21[i] = (chip->write_data >> i) & 0x01;
+                }
+                break;
+            case 0x22: /* LFO control */
+                if ((chip->write_data >> 3) & 0x01)
+                {
+                    chip->lfo_en = 0x7f;
+                }
+                else
+                {
+                    chip->lfo_en = 0;
+                }
+                chip->lfo_freq = chip->write_data & 0x07;
+                break;
+            case 0x24: /* Timer A */
+                chip->timer_a_reg &= 0x03;
+                chip->timer_a_reg |= (chip->write_data & 0xff) << 2;
+                break;
+            case 0x25:
+                chip->timer_a_reg &= 0x3fc;
+                chip->timer_a_reg |= chip->write_data & 0x03;
+                break;
+            case 0x26: /* Timer B */
+                chip->timer_b_reg = chip->write_data & 0xff;
+                break;
+            case 0x27: /* CSM, Timer control */
+                chip->mode_ch3 = (chip->write_data & 0xc0) >> 6;
+                chip->mode_csm = chip->mode_ch3 == 2;
+                chip->timer_a_load = chip->write_data & 0x01;
+                chip->timer_a_enable = (chip->write_data >> 2) & 0x01;
+                chip->timer_a_reset = (chip->write_data >> 4) & 0x01;
+                chip->timer_b_load = (chip->write_data >> 1) & 0x01;
+                chip->timer_b_enable = (chip->write_data >> 3) & 0x01;
+                chip->timer_b_reset = (chip->write_data >> 5) & 0x01;
+                break;
+            case 0x28: /* Key on/off */
+                for (i = 0; i < 4; i++)
+                {
+                    chip->mode_kon_operator[i] = (chip->write_data >> (4 + i)) & 0x01;
+                }
+                if ((chip->write_data & 0x03) == 0x03)
+                {
+                    /* Invalid address */
+                    chip->mode_kon_channel = 0xff;
+                }
+                else
+                {
+                    chip->mode_kon_channel = (chip->write_data & 0x03) + ((chip->write_data >> 2) & 1) * 3;
+                }
+                break;
+            case 0x2a: /* DAC data */
+                chip->dacdata &= 0x01;
+                chip->dacdata |= (chip->write_data ^ 0x80) << 1;
+                break;
+            case 0x2b: /* DAC enable */
+                chip->dacen = chip->write_data >> 7;
+                break;
+            case 0x2c: /* LSI test 2 */
+                for (i = 0; i < 8; i++)
+                {
+                    chip->mode_test_2c[i] = (chip->write_data >> i) & 0x01;
+                }
+                chip->dacdata &= 0x1fe;
+                chip->dacdata |= chip->mode_test_2c[3];
+                chip->eg_custom_timer = !chip->mode_test_2c[7] && chip->mode_test_2c[6];
+                break;
+            default:
+                break;
+            }
+        }
+
+        /* Address */
+        if (chip->write_a_en)
+        {
+            chip->write_fm_mode_a = chip->write_data & 0xff;
+        }
+    }
+
+    if (chip->write_fm_data)
+    {
+        chip->data = chip->write_data & 0xff;
+    }
+}
+
+void OPN2_PhaseCalcIncrement(ym3438_t *chip)
+{
+    Bit32u chan = chip->channel;
+    Bit32u slot = chip->cycles;
+    Bit32u fnum = chip->pg_fnum;
+    Bit32u fnum_h = fnum >> 4;
+    Bit32u fm;
+    Bit32u basefreq;
+    Bit8u lfo = chip->lfo_pm;
+    Bit8u lfo_l = lfo & 0x0f;
+    Bit8u pms = chip->pms[chan];
+    Bit8u dt = chip->dt[slot];
+    Bit8u dt_l = dt & 0x03;
+    Bit8u detune = 0;
+    Bit8u block, note;
+    Bit8u sum, sum_h, sum_l;
+    Bit8u kcode = chip->pg_kcode;
+
+    fnum <<= 1;
+    /* Apply LFO */
+    if (lfo_l & 0x08)
+    {
+        lfo_l ^= 0x0f;
+    }
+    fm = (fnum_h >> pg_lfo_sh1[pms][lfo_l]) + (fnum_h >> pg_lfo_sh2[pms][lfo_l]);
+    if (pms > 5)
+    {
+        fm <<= pms - 5;
+    }
+    fm >>= 2;
+    if (lfo & 0x10)
+    {
+        fnum -= fm;
+    }
+    else
+    {
+        fnum += fm;
+    }
+    fnum &= 0xfff;
+
+    basefreq = (fnum << chip->pg_block) >> 2;
+
+    /* Apply detune */
+    if (dt_l)
+    {
+        if (kcode > 0x1c)
+        {
+            kcode = 0x1c;
+        }
+        block = kcode >> 2;
+        note = kcode & 0x03;
+        sum = block + 9 + ((dt_l == 3) | (dt_l & 0x02));
+        sum_h = sum >> 1;
+        sum_l = sum & 0x01;
+        detune = pg_detune[(sum_l << 2) | note] >> (9 - sum_h);
+    }
+    if (dt & 0x04)
+    {
+        basefreq -= detune;
+    }
+    else
+    {
+        basefreq += detune;
+    }
+    basefreq &= 0x1ffff;
+    chip->pg_inc[slot] = (basefreq * chip->multi[slot]) >> 1;
+    chip->pg_inc[slot] &= 0xfffff;
+}
+
+void OPN2_PhaseGenerate(ym3438_t *chip)
+{
+    Bit32u slot;
+    /* Mask increment */
+    slot = (chip->cycles + 20) % 24;
+    if (chip->pg_reset[slot])
+    {
+        chip->pg_inc[slot] = 0;
+    }
+    /* Phase step */
+    slot = (chip->cycles + 19) % 24;
+    chip->pg_phase[slot] += chip->pg_inc[slot];
+    chip->pg_phase[slot] &= 0xfffff;
+    if (chip->pg_reset[slot] || chip->mode_test_21[3])
+    {
+        chip->pg_phase[slot] = 0;
+    }
+}
+
+void OPN2_EnvelopeSSGEG(ym3438_t *chip)
+{
+    Bit32u slot = chip->cycles;
+    Bit8u direction = 0;
+    chip->eg_ssg_pgrst_latch[slot] = 0;
+    chip->eg_ssg_repeat_latch[slot] = 0;
+    chip->eg_ssg_hold_up_latch[slot] = 0;
+    chip->eg_ssg_inv[slot] = 0;
+    if (chip->ssg_eg[slot] & 0x08)
+    {
+        direction = chip->eg_ssg_dir[slot];
+        if (chip->eg_level[slot] & 0x200)
+        {
+            /* Reset */
+            if ((chip->ssg_eg[slot] & 0x03) == 0x00)
+            {
+                chip->eg_ssg_pgrst_latch[slot] = 1;
+            }
+            /* Repeat */
+            if ((chip->ssg_eg[slot] & 0x01) == 0x00)
+            {
+                chip->eg_ssg_repeat_latch[slot] = 1;
+            }
+            /* Inverse */
+            if ((chip->ssg_eg[slot] & 0x03) == 0x02)
+            {
+                direction ^= 1;
+            }
+            if ((chip->ssg_eg[slot] & 0x03) == 0x03)
+            {
+                direction = 1;
+            }
+        }
+        /* Hold up */
+        if (chip->eg_kon_latch[slot]
+         && ((chip->ssg_eg[slot] & 0x07) == 0x05 || (chip->ssg_eg[slot] & 0x07) == 0x03))
+        {
+            chip->eg_ssg_hold_up_latch[slot] = 1;
+        }
+        direction &= chip->eg_kon[slot];
+        chip->eg_ssg_inv[slot] = (chip->eg_ssg_dir[slot] ^ ((chip->ssg_eg[slot] >> 2) & 0x01))
+                               & chip->eg_kon[slot];
+    }
+    chip->eg_ssg_dir[slot] = direction;
+    chip->eg_ssg_enable[slot] = (chip->ssg_eg[slot] >> 3) & 0x01;
+}
+
+void OPN2_EnvelopeADSR(ym3438_t *chip)
+{
+    Bit32u slot = (chip->cycles + 22) % 24;
+
+    Bit8u nkon = chip->eg_kon_latch[slot];
+    Bit8u okon = chip->eg_kon[slot];
+    Bit8u kon_event;
+    Bit8u koff_event;
+    Bit8u eg_off;
+    Bit16s level;
+    Bit16s nextlevel = 0;
+    Bit16s ssg_level;
+    Bit8u nextstate = chip->eg_state[slot];
+    Bit16s inc = 0;
+    chip->eg_read[0] = chip->eg_read_inc;
+    chip->eg_read_inc = chip->eg_inc > 0;
+
+    /* Reset phase generator */
+    chip->pg_reset[slot] = (nkon && !okon) || chip->eg_ssg_pgrst_latch[slot];
+
+    /* KeyOn/Off */
+    kon_event = (nkon && !okon) || (okon && chip->eg_ssg_repeat_latch[slot]);
+    koff_event = okon && !nkon;
+
+    ssg_level = level = (Bit16s)chip->eg_level[slot];
+
+    if (chip->eg_ssg_inv[slot])
+    {
+        /* Inverse */
+        ssg_level = 512 - level;
+        ssg_level &= 0x3ff;
+    }
+    if (koff_event)
+    {
+        level = ssg_level;
+    }
+    if (chip->eg_ssg_enable[slot])
+    {
+        eg_off = level >> 9;
+    }
+    else
+    {
+        eg_off = (level & 0x3f0) == 0x3f0;
+    }
+    nextlevel = level;
+    if (kon_event)
+    {
+        nextstate = eg_num_attack;
+        /* Instant attack */
+        if (chip->eg_ratemax)
+        {
+            nextlevel = 0;
+        }
+        else if (chip->eg_state[slot] == eg_num_attack && level != 0 && chip->eg_inc && nkon)
+        {
+            inc = (~level << chip->eg_inc) >> 5;
+        }
+    }
+    else
+    {
+        switch (chip->eg_state[slot])
+        {
+        case eg_num_attack:
+            if (level == 0)
+            {
+                nextstate = eg_num_decay;
+            }
+            else if(chip->eg_inc && !chip->eg_ratemax && nkon)
+            {
+                inc = (~level << chip->eg_inc) >> 5;
+            }
+            break;
+        case eg_num_decay:
+            if ((level >> 5) == chip->eg_sl[1])
+            {
+                nextstate = eg_num_sustain;
+            }
+            else if (!eg_off && chip->eg_inc)
+            {
+                inc = 1 << (chip->eg_inc - 1);
+                if (chip->eg_ssg_enable[slot])
+                {
+                    inc <<= 2;
+                }
+            }
+            break;
+        case eg_num_sustain:
+        case eg_num_release:
+            if (!eg_off && chip->eg_inc)
+            {
+                inc = 1 << (chip->eg_inc - 1);
+                if (chip->eg_ssg_enable[slot])
+                {
+                    inc <<= 2;
+                }
+            }
+            break;
+        default:
+            break;
+        }
+        if (!nkon)
+        {
+            nextstate = eg_num_release;
+        }
+    }
+    if (chip->eg_kon_csm[slot])
+    {
+        nextlevel |= chip->eg_tl[1] << 3;
+    }
+
+    /* Envelope off */
+    if (!kon_event && !chip->eg_ssg_hold_up_latch[slot] && chip->eg_state[slot] != eg_num_attack && eg_off)
+    {
+        nextstate = eg_num_release;
+        nextlevel = 0x3ff;
+    }
+
+    nextlevel += inc;
+
+    chip->eg_kon[slot] = chip->eg_kon_latch[slot];
+    chip->eg_level[slot] = (Bit16u)nextlevel & 0x3ff;
+    chip->eg_state[slot] = nextstate;
+}
+
+void OPN2_EnvelopePrepare(ym3438_t *chip)
+{
+    Bit8u rate;
+    Bit8u sum;
+    Bit8u inc = 0;
+    Bit32u slot = chip->cycles;
+    Bit8u rate_sel;
+
+    /* Prepare increment */
+    rate = (chip->eg_rate << 1) + chip->eg_ksv;
+
+    if (rate > 0x3f)
+    {
+        rate = 0x3f;
+    }
+
+    sum = ((rate >> 2) + chip->eg_shift_lock) & 0x0f;
+    if (chip->eg_rate != 0 && chip->eg_quotient == 2)
+    {
+        if (rate < 48)
+        {
+            switch (sum)
+            {
+            case 12:
+                inc = 1;
+                break;
+            case 13:
+                inc = (rate >> 1) & 0x01;
+                break;
+            case 14:
+                inc = rate & 0x01;
+                break;
+            default:
+                break;
+            }
+        }
+        else
+        {
+            inc = eg_stephi[rate & 0x03][chip->eg_timer_low_lock] + (rate >> 2) - 11;
+            if (inc > 4)
+            {
+                inc = 4;
+            }
+        }
+    }
+    chip->eg_inc = inc;
+    chip->eg_ratemax = (rate >> 1) == 0x1f;
+
+    /* Prepare rate & ksv */
+    rate_sel = chip->eg_state[slot];
+    if ((chip->eg_kon[slot] && chip->eg_ssg_repeat_latch[slot])
+     || (!chip->eg_kon[slot] && chip->eg_kon_latch[slot]))
+    {
+        rate_sel = eg_num_attack;
+    }
+    switch (rate_sel)
+    {
+    case eg_num_attack:
+        chip->eg_rate = chip->ar[slot];
+        break;
+    case eg_num_decay:
+        chip->eg_rate = chip->dr[slot];
+        break;
+    case eg_num_sustain:
+        chip->eg_rate = chip->sr[slot];
+        break;
+    case eg_num_release:
+        chip->eg_rate = (chip->rr[slot] << 1) | 0x01;
+        break;
+    default:
+        break;
+    }
+    chip->eg_ksv = chip->pg_kcode >> (chip->ks[slot] ^ 0x03);
+    if (chip->am[slot])
+    {
+        chip->eg_lfo_am = chip->lfo_am >> eg_am_shift[chip->ams[chip->channel]];
+    }
+    else
+    {
+        chip->eg_lfo_am = 0;
+    }
+    /* Delay TL & SL value */
+    chip->eg_tl[1] = chip->eg_tl[0];
+    chip->eg_tl[0] = chip->tl[slot];
+    chip->eg_sl[1] = chip->eg_sl[0];
+    chip->eg_sl[0] = chip->sl[slot];
+}
+
+void OPN2_EnvelopeGenerate(ym3438_t *chip)
+{
+    Bit32u slot = (chip->cycles + 23) % 24;
+    Bit16u level;
+
+    level = chip->eg_level[slot];
+
+    if (chip->eg_ssg_inv[slot])
+    {
+        /* Inverse */
+        level = 512 - level;
+    }
+    if (chip->mode_test_21[5])
+    {
+        level = 0;
+    }
+    level &= 0x3ff;
+
+    /* Apply AM LFO */
+    level += chip->eg_lfo_am;
+
+    /* Apply TL */
+    if (!(chip->mode_csm && chip->channel == 2 + 1))
+    {
+        level += chip->eg_tl[0] << 3;
+    }
+    if (level > 0x3ff)
+    {
+        level = 0x3ff;
+    }
+    chip->eg_out[slot] = level;
+}
+
+void OPN2_UpdateLFO(ym3438_t *chip)
+{
+    if ((chip->lfo_quotient & lfo_cycles[chip->lfo_freq]) == lfo_cycles[chip->lfo_freq])
+    {
+        chip->lfo_quotient = 0;
+        chip->lfo_cnt++;
+    }
+    else
+    {
+        chip->lfo_quotient += chip->lfo_inc;
+    }
+    chip->lfo_cnt &= chip->lfo_en;
+}
+
+void OPN2_FMPrepare(ym3438_t *chip)
+{
+    Bit32u slot = (chip->cycles + 6) % 24;
+    Bit32u channel = chip->channel;
+    Bit16s mod, mod1, mod2;
+    Bit32u op = slot / 6;
+    Bit8u connect = chip->connect[channel];
+    Bit32u prevslot = (chip->cycles + 18) % 24;
+
+    /* Calculate modulation */
+    mod1 = mod2 = 0;
+
+    if (fm_algorithm[op][0][connect])
+    {
+        mod2 |= chip->fm_op1[channel][0];
+    }
+    if (fm_algorithm[op][1][connect])
+    {
+        mod1 |= chip->fm_op1[channel][1];
+    }
+    if (fm_algorithm[op][2][connect])
+    {
+        mod1 |= chip->fm_op2[channel];
+    }
+    if (fm_algorithm[op][3][connect])
+    {
+        mod2 |= chip->fm_out[prevslot];
+    }
+    if (fm_algorithm[op][4][connect])
+    {
+        mod1 |= chip->fm_out[prevslot];
+    }
+    mod = mod1 + mod2;
+    if (op == 0)
+    {
+        /* Feedback */
+        mod = mod >> (10 - chip->fb[channel]);
+        if (!chip->fb[channel])
+        {
+            mod = 0;
+        }
+    }
+    else
+    {
+        mod >>= 1;
+    }
+    chip->fm_mod[slot] = mod;
+
+    slot = (chip->cycles + 18) % 24;
+    /* OP1 */
+    if (slot / 6 == 0)
+    {
+        chip->fm_op1[channel][1] = chip->fm_op1[channel][0];
+        chip->fm_op1[channel][0] = chip->fm_out[slot];
+    }
+    /* OP2 */
+    if (slot / 6 == 2)
+    {
+        chip->fm_op2[channel] = chip->fm_out[slot];
+    }
+}
+
+void OPN2_ChGenerate(ym3438_t *chip)
+{
+    Bit32u slot = (chip->cycles + 18) % 24;
+    Bit32u channel = chip->channel;
+    Bit32u op = slot / 6;
+    Bit32u test_dac = chip->mode_test_2c[5];
+    Bit16s acc = chip->ch_acc[channel];
+    Bit16s add = test_dac;
+    Bit16s sum = 0;
+    if (op == 0 && !test_dac)
+    {
+        acc = 0;
+    }
+    if (fm_algorithm[op][5][chip->connect[channel]] && !test_dac)
+    {
+        add += chip->fm_out[slot] >> 5;
+    }
+    sum = acc + add;
+    /* Clamp */
+    if (sum > 255)
+    {
+        sum = 255;
+    }
+    else if(sum < -256)
+    {
+        sum = -256;
+    }
+
+    if (op == 0 || test_dac)
+    {
+        chip->ch_out[channel] = chip->ch_acc[channel];
+    }
+    chip->ch_acc[channel] = sum;
+}
+
+void OPN2_ChOutput(ym3438_t *chip)
+{
+    Bit32u cycles = chip->cycles;
+    Bit32u slot = chip->cycles;
+    Bit32u channel = chip->channel;
+    Bit32u test_dac = chip->mode_test_2c[5];
+    Bit16s out;
+    Bit16s sign;
+    Bit32u out_en;
+    chip->ch_read = chip->ch_lock;
+    if (slot < 12)
+    {
+        /* Ch 4,5,6 */
+        channel++;
+    }
+    if ((cycles & 3) == 0)
+    {
+        if (!test_dac)
+        {
+            /* Lock value */
+            chip->ch_lock = chip->ch_out[channel];
+        }
+        chip->ch_lock_l = chip->pan_l[channel];
+        chip->ch_lock_r = chip->pan_r[channel];
+    }
+    /* Ch 6 */
+    if (((cycles >> 2) == 1 && chip->dacen) || test_dac)
+    {
+        out = (Bit16s)chip->dacdata;
+        out <<= 7;
+        out >>= 7;
+    }
+    else
+    {
+        out = chip->ch_lock;
+    }
+    chip->mol = 0;
+    chip->mor = 0;
+
+    if (chip_type == ym3438_type_ym2612)
+    {
+        out_en = ((cycles & 3) == 3) || test_dac;
+        /* YM2612 DAC emulation(not verified) */
+        sign = out >> 8;
+        if (out >= 0)
+        {
+            out++;
+            sign++;
+        }
+        if (chip->ch_lock_l && out_en)
+        {
+            chip->mol = out;
+        }
+        else
+        {
+            chip->mol = sign;
+        }
+        if (chip->ch_lock_r && out_en)
+        {
+            chip->mor = out;
+        }
+        else
+        {
+            chip->mor = sign;
+        }
+        /* Amplify signal */
+        chip->mol *= 3;
+        chip->mor *= 3;
+    }
+    else
+    {
+        out_en = ((cycles & 3) != 0) || test_dac;
+        /* Discrete YM3438 seems has the ladder effect too */
+        if (out >= 0 && chip_type == ym3438_type_discrete)
+        {
+            out++;
+        }
+        if (chip->ch_lock_l && out_en)
+        {
+            chip->mol = out;
+        }
+        if (chip->ch_lock_r && out_en)
+        {
+            chip->mor = out;
+        }
+    }
+}
+
+void OPN2_FMGenerate(ym3438_t *chip)
+{
+    Bit32u slot = (chip->cycles + 19) % 24;
+    /* Calculate phase */
+    Bit16u phase = (chip->fm_mod[slot] + (chip->pg_phase[slot] >> 10)) & 0x3ff;
+    Bit16u quarter;
+    Bit16u level;
+    Bit16s output;
+    if (phase & 0x100)
+    {
+        quarter = (phase ^ 0xff) & 0xff;
+    }
+    else
+    {
+        quarter = phase & 0xff;
+    }
+    level = logsinrom[quarter];
+    /* Apply envelope */
+    level += chip->eg_out[slot] << 2;
+    /* Transform */
+    if (level > 0x1fff)
+    {
+        level = 0x1fff;
+    }
+    output = ((exprom[(level & 0xff) ^ 0xff] | 0x400) << 2) >> (level >> 8);
+    if (phase & 0x200)
+    {
+        output = ((~output) ^ (chip->mode_test_21[4] << 13)) + 1;
+    }
+    else
+    {
+        output = output ^ (chip->mode_test_21[4] << 13);
+    }
+    output <<= 2;
+    output >>= 2;
+    chip->fm_out[slot] = output;
+}
+
+void OPN2_DoTimerA(ym3438_t *chip)
+{
+    Bit16u time;
+    Bit8u load;
+    load = chip->timer_a_overflow;
+    if (chip->cycles == 2)
+    {
+        /* Lock load value */
+        load |= (!chip->timer_a_load_lock && chip->timer_a_load);
+        chip->timer_a_load_lock = chip->timer_a_load;
+        if (chip->mode_csm)
+        {
+            /* CSM KeyOn */
+            chip->mode_kon_csm = load;
+        }
+        else
+        {
+            chip->mode_kon_csm = 0;
+        }
+    }
+    /* Load counter */
+    if (chip->timer_a_load_latch)
+    {
+        time = chip->timer_a_reg;
+    }
+    else
+    {
+        time = chip->timer_a_cnt;
+    }
+    chip->timer_a_load_latch = load;
+    /* Increase counter */
+    if ((chip->cycles == 1 && chip->timer_a_load_lock) || chip->mode_test_21[2])
+    {
+        time++;
+    }
+    /* Set overflow flag */
+    if (chip->timer_a_reset)
+    {
+        chip->timer_a_reset = 0;
+        chip->timer_a_overflow_flag = 0;
+    }
+    else
+    {
+        chip->timer_a_overflow_flag |= chip->timer_a_overflow & chip->timer_a_enable;
+    }
+    chip->timer_a_overflow = (time >> 10);
+    chip->timer_a_cnt = time & 0x3ff;
+}
+
+void OPN2_DoTimerB(ym3438_t *chip)
+{
+    Bit16u time;
+    Bit8u load;
+    load = chip->timer_b_overflow;
+    if (chip->cycles == 2)
+    {
+        /* Lock load value */
+        load |= (!chip->timer_b_load_lock && chip->timer_b_load);
+        chip->timer_b_load_lock = chip->timer_b_load;
+    }
+    /* Load counter */
+    if (chip->timer_b_load_latch)
+    {
+        time = chip->timer_b_reg;
+    }
+    else
+    {
+        time = chip->timer_b_cnt;
+    }
+    chip->timer_b_load_latch = load;
+    /* Increase counter */
+    if (chip->cycles == 1)
+    {
+        chip->timer_b_subcnt++;
+    }
+    if ((chip->timer_b_subcnt == 0x10 && chip->timer_b_load_lock) || chip->mode_test_21[2])
+    {
+        time++;
+    }
+    chip->timer_b_subcnt &= 0x0f;
+    /* Set overflow flag */
+    if (chip->timer_b_reset)
+    {
+        chip->timer_b_reset = 0;
+        chip->timer_b_overflow_flag = 0;
+    }
+    else
+    {
+        chip->timer_b_overflow_flag |= chip->timer_b_overflow & chip->timer_b_enable;
+    }
+    chip->timer_b_overflow = (time >> 8);
+    chip->timer_b_cnt = time & 0xff;
+}
+
+void OPN2_KeyOn(ym3438_t*chip)
+{
+    Bit32u slot = chip->cycles;
+    Bit32u chan = chip->channel;
+    /* Key On */
+    chip->eg_kon_latch[slot] = chip->mode_kon[slot];
+    chip->eg_kon_csm[slot] = 0;
+    if (chip->channel == 2 && chip->mode_kon_csm)
+    {
+        /* CSM Key On */
+        chip->eg_kon_latch[slot] = 1;
+        chip->eg_kon_csm[slot] = 1;
+    }
+    if (chip->cycles == chip->mode_kon_channel)
+    {
+        /* OP1 */
+        chip->mode_kon[chan] = chip->mode_kon_operator[0];
+        /* OP2 */
+        chip->mode_kon[chan + 12] = chip->mode_kon_operator[1];
+        /* OP3 */
+        chip->mode_kon[chan + 6] = chip->mode_kon_operator[2];
+        /* OP4 */
+        chip->mode_kon[chan + 18] = chip->mode_kon_operator[3];
+    }
+}
+
+void OPN2_Reset(ym3438_t *chip, Bit32u rate, Bit32u clock)
+{
+    Bit32u i, rateratio;
+    rateratio = (Bit32u)chip->rateratio;
+    memset(chip, 0, sizeof(ym3438_t));
+    for (i = 0; i < 24; i++)
+    {
+        chip->eg_out[i] = 0x3ff;
+        chip->eg_level[i] = 0x3ff;
+        chip->eg_state[i] = eg_num_release;
+        chip->multi[i] = 1;
+    }
+    for (i = 0; i < 6; i++)
+    {
+        chip->pan_l[i] = 1;
+        chip->pan_r[i] = 1;
+    }
+
+    if (rate != 0)
+    {
+        chip->rateratio = (Bit32s)(Bit32u)((((Bit64u)144 * rate) << RSM_FRAC) / clock);
+    }
+    else
+    {
+        chip->rateratio = (Bit32s)rateratio;
+    }
+}
+
+void OPN2_SetChipType(Bit32u type)
+{
+    chip_type = type;
+}
+
+void OPN2_Clock(ym3438_t *chip, Bit16s *buffer)
+{
+    Bit32u slot = chip->cycles;
+    chip->lfo_inc = chip->mode_test_21[1];
+    chip->pg_read >>= 1;
+    chip->eg_read[1] >>= 1;
+    chip->eg_cycle++;
+    /* Lock envelope generator timer value */
+    if (chip->cycles == 1 && chip->eg_quotient == 2)
+    {
+        if (chip->eg_cycle_stop)
+        {
+            chip->eg_shift_lock = 0;
+        }
+        else
+        {
+            chip->eg_shift_lock = chip->eg_shift + 1;
+        }
+        chip->eg_timer_low_lock = chip->eg_timer & 0x03;
+    }
+    /* Cycle specific functions */
+    switch (chip->cycles)
+    {
+    case 0:
+        chip->lfo_pm = chip->lfo_cnt >> 2;
+        if (chip->lfo_cnt & 0x40)
+        {
+            chip->lfo_am = chip->lfo_cnt & 0x3f;
+        }
+        else
+        {
+            chip->lfo_am = chip->lfo_cnt ^ 0x3f;
+        }
+        chip->lfo_am <<= 1;
+        break;
+    case 1:
+        chip->eg_quotient++;
+        chip->eg_quotient %= 3;
+        chip->eg_cycle = 0;
+        chip->eg_cycle_stop = 1;
+        chip->eg_shift = 0;
+        chip->eg_timer_inc |= chip->eg_quotient >> 1;
+        chip->eg_timer = chip->eg_timer + chip->eg_timer_inc;
+        chip->eg_timer_inc = chip->eg_timer >> 12;
+        chip->eg_timer &= 0xfff;
+        break;
+    case 2:
+        chip->pg_read = chip->pg_phase[21] & 0x3ff;
+        chip->eg_read[1] = chip->eg_out[0];
+        break;
+    case 13:
+        chip->eg_cycle = 0;
+        chip->eg_cycle_stop = 1;
+        chip->eg_shift = 0;
+        chip->eg_timer = chip->eg_timer + chip->eg_timer_inc;
+        chip->eg_timer_inc = chip->eg_timer >> 12;
+        chip->eg_timer &= 0xfff;
+        break;
+    case 23:
+        chip->lfo_inc |= 1;
+        break;
+    }
+    chip->eg_timer &= ~(chip->mode_test_21[5] << chip->eg_cycle);
+    if (((chip->eg_timer >> chip->eg_cycle) | (chip->pin_test_in & chip->eg_custom_timer)) & chip->eg_cycle_stop)
+    {
+        chip->eg_shift = chip->eg_cycle;
+        chip->eg_cycle_stop = 0;
+    }
+
+    OPN2_DoIO(chip);
+
+    OPN2_DoTimerA(chip);
+    OPN2_DoTimerB(chip);
+    OPN2_KeyOn(chip);
+
+    OPN2_ChOutput(chip);
+    OPN2_ChGenerate(chip);
+
+    OPN2_FMPrepare(chip);
+    OPN2_FMGenerate(chip);
+
+    OPN2_PhaseGenerate(chip);
+    OPN2_PhaseCalcIncrement(chip);
+
+    OPN2_EnvelopeADSR(chip);
+    OPN2_EnvelopeGenerate(chip);
+    OPN2_EnvelopeSSGEG(chip);
+    OPN2_EnvelopePrepare(chip);
+
+    /* Prepare fnum & block */
+    if (chip->mode_ch3)
+    {
+        /* Channel 3 special mode */
+        switch (slot)
+        {
+        case 1: /* OP1 */
+            chip->pg_fnum = chip->fnum_3ch[1];
+            chip->pg_block = chip->block_3ch[1];
+            chip->pg_kcode = chip->kcode_3ch[1];
+            break;
+        case 7: /* OP3 */
+            chip->pg_fnum = chip->fnum_3ch[0];
+            chip->pg_block = chip->block_3ch[0];
+            chip->pg_kcode = chip->kcode_3ch[0];
+            break;
+        case 13: /* OP2 */
+            chip->pg_fnum = chip->fnum_3ch[2];
+            chip->pg_block = chip->block_3ch[2];
+            chip->pg_kcode = chip->kcode_3ch[2];
+            break;
+        case 19: /* OP4 */
+        default:
+            chip->pg_fnum = chip->fnum[(chip->channel + 1) % 6];
+            chip->pg_block = chip->block[(chip->channel + 1) % 6];
+            chip->pg_kcode = chip->kcode[(chip->channel + 1) % 6];
+            break;
+        }
+    }
+    else
+    {
+        chip->pg_fnum = chip->fnum[(chip->channel + 1) % 6];
+        chip->pg_block = chip->block[(chip->channel + 1) % 6];
+        chip->pg_kcode = chip->kcode[(chip->channel + 1) % 6];
+    }
+
+    OPN2_UpdateLFO(chip);
+    OPN2_DoRegWrite(chip);
+    chip->cycles = (chip->cycles + 1) % 24;
+    chip->channel = chip->cycles % 6;
+
+    buffer[0] = chip->mol;
+    buffer[1] = chip->mor;
+}
+
+void OPN2_Write(ym3438_t *chip, Bit32u port, Bit8u data)
+{
+    port &= 3;
+    chip->write_data = ((port << 7) & 0x100) | data;
+    if (port & 1)
+    {
+        /* Data */
+        chip->write_d |= 1;
+    }
+    else
+    {
+        /* Address */
+        chip->write_a |= 1;
+    }
+}
+
+void OPN2_SetTestPin(ym3438_t *chip, Bit32u value)
+{
+    chip->pin_test_in = value & 1;
+}
+
+Bit32u OPN2_ReadTestPin(ym3438_t *chip)
+{
+    if (!chip->mode_test_2c[7])
+    {
+        return 0;
+    }
+    return chip->cycles == 23;
+}
+
+Bit32u OPN2_ReadIRQPin(ym3438_t *chip)
+{
+    return chip->timer_a_overflow_flag | chip->timer_b_overflow_flag;
+}
+
+Bit8u OPN2_Read(ym3438_t *chip, Bit32u port)
+{
+    if ((port & 3) == 0 || chip_type == ym3438_type_asic)
+    {
+        if (chip->mode_test_21[6])
+        {
+            /* Read test data */
+            Bit32u slot = (chip->cycles + 18) % 24;
+            Bit16u testdata = ((chip->pg_read & 0x01) << 15)
+                            | ((chip->eg_read[chip->mode_test_21[0]] & 0x01) << 14);
+            if (chip->mode_test_2c[4])
+            {
+                testdata |= chip->ch_read & 0x1ff;
+            }
+            else
+            {
+                testdata |= chip->fm_out[slot] & 0x3fff;
+            }
+            if (chip->mode_test_21[7])
+            {
+                return testdata & 0xff;
+            }
+            else
+            {
+                return testdata >> 8;
+            }
+        }
+        else
+        {
+            return (Bit8u)(chip->busy << 7) | (Bit8u)(chip->timer_b_overflow_flag << 1)
+                    | (Bit8u)chip->timer_a_overflow_flag;
+        }
+    }
+    return 0;
+}
+
+void OPN2_WriteBuffered(ym3438_t *chip, Bit32u port, Bit8u data)
+{
+    Bit64u time1, time2;
+    Bit16s buffer[2];
+    Bit64u skip;
+
+    if (chip->writebuf[chip->writebuf_last].port & 0x04)
+    {
+        OPN2_Write(chip, chip->writebuf[chip->writebuf_last].port & 0X03,
+                   chip->writebuf[chip->writebuf_last].data);
+
+        chip->writebuf_cur = (chip->writebuf_last + 1) % OPN_WRITEBUF_SIZE;
+        skip = chip->writebuf[chip->writebuf_last].time - chip->writebuf_samplecnt;
+        chip->writebuf_samplecnt = chip->writebuf[chip->writebuf_last].time;
+        while (skip--)
+        {
+            OPN2_Clock(chip, buffer);
+        }
+    }
+
+    chip->writebuf[chip->writebuf_last].port = (port & 0x03) | 0x04;
+    chip->writebuf[chip->writebuf_last].data = data;
+    time1 = chip->writebuf_lasttime + OPN_WRITEBUF_DELAY;
+    time2 = chip->writebuf_samplecnt;
+
+    if (time1 < time2)
+    {
+        time1 = time2;
+    }
+
+    chip->writebuf[chip->writebuf_last].time = time1;
+    chip->writebuf_lasttime = time1;
+    chip->writebuf_last = (chip->writebuf_last + 1) % OPN_WRITEBUF_SIZE;
+}
+
+void OPN2_Generate(ym3438_t *chip, Bit16s *buf)
+{
+    Bit32u i;
+    Bit16s buffer[2];
+    Bit32u mute;
+
+    buf[0] = 0;
+    buf[1] = 0;
+
+    for (i = 0; i < 24; i++)
+    {
+        switch (chip->cycles >> 2)
+        {
+        case 0: /* Ch 2 */
+            mute = chip->mute[1];
+            break;
+        case 1: /* Ch 6, DAC */
+            mute = chip->mute[5 + chip->dacen];
+            break;
+        case 2: /* Ch 4 */
+            mute = chip->mute[3];
+            break;
+        case 3: /* Ch 1 */
+            mute = chip->mute[0];
+            break;
+        case 4: /* Ch 5 */
+            mute = chip->mute[4];
+            break;
+        case 5: /* Ch 3 */
+            mute = chip->mute[2];
+            break;
+        default:
+            mute = 0;
+            break;
+        }
+        OPN2_Clock(chip, buffer);
+        if (!mute)
+        {
+            buf[0] += buffer[0];
+            buf[1] += buffer[1];
+        }
+
+        while (chip->writebuf[chip->writebuf_cur].time <= chip->writebuf_samplecnt)
+        {
+            if (!(chip->writebuf[chip->writebuf_cur].port & 0x04))
+            {
+                break;
+            }
+            chip->writebuf[chip->writebuf_cur].port &= 0x03;
+            OPN2_Write(chip, chip->writebuf[chip->writebuf_cur].port,
+                       chip->writebuf[chip->writebuf_cur].data);
+            chip->writebuf_cur = (chip->writebuf_cur + 1) % OPN_WRITEBUF_SIZE;
+        }
+        chip->writebuf_samplecnt++;
+    }
+}
+
+void OPN2_GenerateResampled(ym3438_t *chip, Bit16s *buf)
+{
+    Bit16s buffer[2];
+
+    while (chip->samplecnt >= chip->rateratio)
+    {
+        chip->oldsamples[0] = chip->samples[0];
+        chip->oldsamples[1] = chip->samples[1];
+        OPN2_Generate(chip, buffer);
+        chip->samples[0] = buffer[0] * 11;
+        chip->samples[1] = buffer[1] * 11;
+        chip->samplecnt -= chip->rateratio;
+    }
+    buf[0] = (Bit16s)(((chip->oldsamples[0] * (chip->rateratio - chip->samplecnt)
+                     + chip->samples[0] * chip->samplecnt) / chip->rateratio)>>1);
+    buf[1] = (Bit16s)(((chip->oldsamples[1] * (chip->rateratio - chip->samplecnt)
+                     + chip->samples[1] * chip->samplecnt) / chip->rateratio)>>1);
+    chip->samplecnt += 1 << RSM_FRAC;
+}
+
+void OPN2_GenerateStream(ym3438_t *chip, Bit16s *output, Bit32u numsamples)
+{
+    Bit32u i;
+    Bit16s buffer[2];
+
+    for (i = 0; i < numsamples; i++)
+    {
+        OPN2_GenerateResampled(chip, buffer);
+        *output++ = buffer[0];
+        *output++ = buffer[1];
+    }
+}
+
+void OPN2_GenerateStreamMix(ym3438_t *chip, Bit16s *output, Bit32u numsamples)
+{
+    Bit32u i;
+    Bit16s buffer[2];
+
+    for (i = 0; i < numsamples; i++)
+    {
+        OPN2_GenerateResampled(chip, buffer);
+        *output++ += buffer[0];
+        *output++ += buffer[1];
+    }
+}
+
+
+void OPN2_SetOptions(Bit8u flags)
+{
+    switch ((flags >> 3) & 0x03)
+    {
+    case 0x00: /* YM2612 */
+    default:
+        OPN2_SetChipType(ym3438_type_ym2612);
+        break;
+    case 0x01: /* ASIC YM3438 */
+        OPN2_SetChipType(ym3438_type_asic);
+        break;
+    case 0x02: /* Discrete YM3438 */
+        OPN2_SetChipType(ym3438_type_discrete);
+        break;
+    }
+}
+
+void OPN2_SetMute(ym3438_t *chip, Bit32u mute)
+{
+    Bit32u i;
+    for (i = 0; i < 7; i++)
+    {
+        chip->mute[i] = (mute >> i) & 0x01;
+    }
+}
+
+
+} // Ym2612_NukedImpl
+
+
+Ym2612_Nuked_Emu::Ym2612_Nuked_Emu()
+{
+	Ym2612_NukedImpl::OPN2_SetChipType( Ym2612_NukedImpl::ym3438_type_asic );
+	impl = new Ym2612_NukedImpl::ym3438_t;
+}
+
+Ym2612_Nuked_Emu::~Ym2612_Nuked_Emu()
+{
+	Ym2612_NukedImpl::ym3438_t *chip_r = reinterpret_cast<Ym2612_NukedImpl::ym3438_t*>(impl);
+	if ( chip_r ) delete chip_r;
+}
+
+const char *Ym2612_Nuked_Emu::set_rate(double sample_rate, double clock_rate)
+{
+	Ym2612_NukedImpl::ym3438_t *chip_r = reinterpret_cast<Ym2612_NukedImpl::ym3438_t*>(impl);
+	if ( !chip_r )
+		return "Out of memory";
+	prev_sample_rate = sample_rate;
+	prev_clock_rate = clock_rate;
+	Ym2612_NukedImpl::OPN2_Reset( chip_r, static_cast<Bit32u>(sample_rate), static_cast<Bit32u>(clock_rate) );
+	return 0;
+}
+
+void Ym2612_Nuked_Emu::reset()
+{
+	Ym2612_NukedImpl::ym3438_t *chip_r = reinterpret_cast<Ym2612_NukedImpl::ym3438_t*>(impl);
+	if ( !chip_r ) Ym2612_NukedImpl::OPN2_Reset( chip_r, static_cast<Bit32u>(prev_sample_rate), static_cast<Bit32u>(prev_clock_rate) );
+}
+
+void Ym2612_Nuked_Emu::mute_voices(int mask)
+{
+	Ym2612_NukedImpl::ym3438_t *chip_r = reinterpret_cast<Ym2612_NukedImpl::ym3438_t*>(impl);
+	if ( chip_r ) Ym2612_NukedImpl::OPN2_SetMute( chip_r, mask );
+}
+
+void Ym2612_Nuked_Emu::write0(int addr, int data)
+{
+	Ym2612_NukedImpl::ym3438_t *chip_r = reinterpret_cast<Ym2612_NukedImpl::ym3438_t*>(impl);
+	if ( !chip_r ) return;
+	Ym2612_NukedImpl::OPN2_WriteBuffered( chip_r, 0, static_cast<Bit8u>(addr) );
+	Ym2612_NukedImpl::OPN2_WriteBuffered( chip_r, 1, static_cast<Bit8u>(data) );
+}
+
+void Ym2612_Nuked_Emu::write1(int addr, int data)
+{
+	Ym2612_NukedImpl::ym3438_t *chip_r = reinterpret_cast<Ym2612_NukedImpl::ym3438_t*>(impl);
+	if ( !chip_r ) return;
+	Ym2612_NukedImpl::OPN2_WriteBuffered( chip_r, 0 + 2, static_cast<Bit8u>(addr) );
+	Ym2612_NukedImpl::OPN2_WriteBuffered( chip_r, 1 + 2, static_cast<Bit8u>(data) );
+}
+
+void Ym2612_Nuked_Emu::run(int pair_count, Ym2612_Nuked_Emu::sample_t *out)
+{
+	Ym2612_NukedImpl::ym3438_t *chip_r = reinterpret_cast<Ym2612_NukedImpl::ym3438_t*>(impl);
+	if ( !chip_r ) return;
+	Ym2612_NukedImpl::OPN2_GenerateStream(chip_r, out, pair_count);
+}
diff --git a/libraries/game-music-emu/gme/Ym2612_Nuked.h b/libraries/game-music-emu/gme/Ym2612_Nuked.h
new file mode 100644
index 000000000..6c265b138
--- /dev/null
+++ b/libraries/game-music-emu/gme/Ym2612_Nuked.h
@@ -0,0 +1,41 @@
+// YM2612 FM sound chip emulator interface
+
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+#ifndef YM2612_EMU_H
+#define YM2612_EMU_H
+
+typedef void Ym2612_Nuked_Impl;
+
+class Ym2612_Nuked_Emu  {
+	Ym2612_Nuked_Impl* impl;
+	double prev_sample_rate;
+	double prev_clock_rate;
+public:
+	Ym2612_Nuked_Emu();
+	~Ym2612_Nuked_Emu();
+
+	// Set output sample rate and chip clock rates, in Hz. Returns non-zero
+	// if error.
+	const char* set_rate( double sample_rate, double clock_rate );
+
+	// Reset to power-up state
+	void reset();
+
+	// Mute voice n if bit n (1 << n) of mask is set
+	enum { channel_count = 6 };
+	void mute_voices( int mask );
+
+	// Write addr to register 0 then data to register 1
+	void write0( int addr, int data );
+
+	// Write addr to register 2 then data to register 3
+	void write1( int addr, int data );
+
+	// Run and add pair_count samples into current output buffer contents
+	typedef short sample_t;
+	enum { out_chan_count = 2 }; // stereo
+	void run( int pair_count, sample_t* out );
+};
+
+#endif
+
diff --git a/libraries/game-music-emu/gme/blargg_common.h b/libraries/game-music-emu/gme/blargg_common.h
new file mode 100644
index 000000000..13cc2417e
--- /dev/null
+++ b/libraries/game-music-emu/gme/blargg_common.h
@@ -0,0 +1,160 @@
+// Sets up common environment for Shay Green's libraries.
+// To change configuration options, modify blargg_config.h, not this file.
+
+#ifndef BLARGG_COMMON_H
+#define BLARGG_COMMON_H
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <limits.h>
+
+#undef BLARGG_COMMON_H
+// allow blargg_config.h to #include blargg_common.h
+#include "blargg_config.h"
+#ifndef BLARGG_COMMON_H
+#define BLARGG_COMMON_H
+
+// BLARGG_RESTRICT: equivalent to restrict, where supported
+#if __GNUC__ >= 3 || _MSC_VER >= 1100
+	#define BLARGG_RESTRICT __restrict
+#else
+	#define BLARGG_RESTRICT
+#endif
+
+// STATIC_CAST(T,expr): Used in place of static_cast<T> (expr)
+#ifndef STATIC_CAST
+	#define STATIC_CAST(T,expr) ((T) (expr))
+#endif
+
+// blargg_err_t (0 on success, otherwise error string)
+#ifndef blargg_err_t
+	typedef const char* blargg_err_t;
+#endif
+
+// blargg_vector - very lightweight vector of POD types (no constructor/destructor)
+template<class T>
+class blargg_vector {
+	T* begin_;
+	size_t size_;
+public:
+	blargg_vector() : begin_( 0 ), size_( 0 ) { }
+	~blargg_vector() { free( begin_ ); }
+	size_t size() const { return size_; }
+	T* begin() const { return begin_; }
+	T* end() const { return begin_ + size_; }
+	blargg_err_t resize( size_t n )
+	{
+		void* p = realloc( begin_, n * sizeof (T) );
+		if ( !p && n )
+			return "Out of memory";
+		begin_ = (T*) p;
+		size_ = n;
+		return 0;
+	}
+	void clear() { void* p = begin_; begin_ = 0; size_ = 0; free( p ); }
+	T& operator [] ( size_t n ) const
+	{
+		assert( n <= size_ ); // <= to allow past-the-end value
+		return begin_ [n];
+	}
+};
+
+#ifndef BLARGG_DISABLE_NOTHROW
+	// throw spec mandatory in ISO C++ if operator new can return NULL
+	#if __cplusplus >= 199711 || __GNUC__ >= 3
+		#define BLARGG_THROWS( spec ) throw spec
+	#else
+		#define BLARGG_THROWS( spec )
+	#endif
+	#define BLARGG_DISABLE_NOTHROW \
+		void* operator new ( size_t s ) BLARGG_THROWS(()) { return malloc( s ); }\
+		void operator delete ( void* p ) { free( p ); }
+	#define BLARGG_NEW new
+#else
+	#include <new>
+	#define BLARGG_NEW new (std::nothrow)
+#endif
+
+// BLARGG_4CHAR('a','b','c','d') = 'abcd' (four character integer constant)
+#define BLARGG_4CHAR( a, b, c, d ) \
+	((a&0xFF)*0x1000000L + (b&0xFF)*0x10000L + (c&0xFF)*0x100L + (d&0xFF))
+
+#define BLARGG_2CHAR( a, b ) \
+	((a&0xFF)*0x100L + (b&0xFF))
+
+// BOOST_STATIC_ASSERT( expr ): Generates compile error if expr is 0.
+#ifndef BOOST_STATIC_ASSERT
+	#ifdef _MSC_VER
+		// MSVC6 (_MSC_VER < 1300) fails for use of __LINE__ when /Zl is specified
+		#define BOOST_STATIC_ASSERT( expr ) \
+			void blargg_failed_( int (*arg) [2 / (int) !!(expr) - 1] )
+	#else
+		// Some other compilers fail when declaring same function multiple times in class,
+		// so differentiate them by line
+		#define BOOST_STATIC_ASSERT( expr ) \
+			void blargg_failed_( int (*arg) [2 / !!(expr) - 1] [__LINE__] )
+	#endif
+#endif
+
+// BLARGG_COMPILER_HAS_BOOL: If 0, provides bool support for old compiler. If 1,
+// compiler is assumed to support bool. If undefined, availability is determined.
+#ifndef BLARGG_COMPILER_HAS_BOOL
+	#if defined (__MWERKS__)
+		#if !__option(bool)
+			#define BLARGG_COMPILER_HAS_BOOL 0
+		#endif
+	#elif defined (_MSC_VER)
+		#if _MSC_VER < 1100
+			#define BLARGG_COMPILER_HAS_BOOL 0
+		#endif
+	#elif defined (__GNUC__)
+		// supports bool
+	#elif __cplusplus < 199711
+		#define BLARGG_COMPILER_HAS_BOOL 0
+	#endif
+#endif
+#if defined (BLARGG_COMPILER_HAS_BOOL) && !BLARGG_COMPILER_HAS_BOOL
+	// If you get errors here, modify your blargg_config.h file
+	typedef int bool;
+	const bool true  = 1;
+	const bool false = 0;
+#endif
+
+// blargg_long/blargg_ulong = at least 32 bits, int if it's big enough
+
+#if INT_MAX < 0x7FFFFFFF || LONG_MAX == 0x7FFFFFFF
+	typedef long blargg_long;
+#else
+	typedef int blargg_long;
+#endif
+
+#if UINT_MAX < 0xFFFFFFFF || ULONG_MAX == 0xFFFFFFFF
+	typedef unsigned long blargg_ulong;
+#else
+	typedef unsigned blargg_ulong;
+#endif
+
+// int8_t etc.
+
+// TODO: Add CMake check for this, although I'd likely just point affected
+// persons to a real compiler...
+#if 1 || defined (HAVE_STDINT_H)
+	#include <stdint.h>
+#endif
+
+#if __GNUC__ >= 3
+	#define BLARGG_DEPRECATED __attribute__ ((deprecated))
+#else
+	#define BLARGG_DEPRECATED
+#endif
+
+// Use in place of "= 0;" for a pure virtual, since these cause calls to std C++ lib.
+// During development, BLARGG_PURE( x ) expands to = 0;
+// virtual int func() BLARGG_PURE( { return 0; } )
+#ifndef BLARGG_PURE
+	#define BLARGG_PURE( def ) def
+#endif
+
+#endif
+#endif
diff --git a/libraries/game-music-emu/gme/blargg_config.h b/libraries/game-music-emu/gme/blargg_config.h
new file mode 100644
index 000000000..377dd2d8c
--- /dev/null
+++ b/libraries/game-music-emu/gme/blargg_config.h
@@ -0,0 +1,43 @@
+// Library configuration. Modify this file as necessary.
+
+#ifndef BLARGG_CONFIG_H
+#define BLARGG_CONFIG_H
+
+// Uncomment to use zlib for transparent decompression of gzipped files
+//#define HAVE_ZLIB_H
+
+// Uncomment and edit list to support only the listed game music types,
+// so that the others don't get linked in at all.
+/*
+#define GME_TYPE_LIST \
+	gme_ay_type,\
+	gme_gbs_type,\
+	gme_gym_type,\
+	gme_hes_type,\
+	gme_kss_type,\
+	gme_nsf_type,\
+	gme_nsfe_type,\
+	gme_sap_type,\
+	gme_spc_type,\
+	gme_vgm_type,\
+	gme_vgz_type
+*/
+
+// Uncomment to enable platform-specific optimizations
+//#define BLARGG_NONPORTABLE 1
+
+// Uncomment to use faster, lower quality sound synthesis
+//#define BLIP_BUFFER_FAST 1
+
+// Uncomment if automatic byte-order determination doesn't work
+//#define BLARGG_BIG_ENDIAN 1
+
+// Uncomment if you get errors in the bool section of blargg_common.h
+//#define BLARGG_COMPILER_HAS_BOOL 1
+
+// Use standard config.h if present
+#ifdef HAVE_CONFIG_H
+	#include "config.h"
+#endif
+
+#endif
diff --git a/libraries/game-music-emu/gme/blargg_endian.h b/libraries/game-music-emu/gme/blargg_endian.h
new file mode 100644
index 000000000..46e58e2f0
--- /dev/null
+++ b/libraries/game-music-emu/gme/blargg_endian.h
@@ -0,0 +1,184 @@
+// CPU Byte Order Utilities
+
+#ifndef BLARGG_ENDIAN
+#define BLARGG_ENDIAN
+
+#include "blargg_common.h"
+
+// BLARGG_CPU_CISC: Defined if CPU has very few general-purpose registers (< 16)
+#if defined (__i386__) || defined (__x86_64__) || defined (_M_IX86) || defined (_M_X64)
+	#define BLARGG_CPU_X86 1
+	#define BLARGG_CPU_CISC 1
+#endif
+
+#if defined (__powerpc__) || defined (__ppc__) || defined (__ppc64__) || \
+		defined (__POWERPC__) || defined (__powerc)
+	#define BLARGG_CPU_POWERPC 1
+	#define BLARGG_CPU_RISC 1
+#endif
+
+// BLARGG_BIG_ENDIAN, BLARGG_LITTLE_ENDIAN: Determined automatically, otherwise only
+// one may be #defined to 1. Only needed if something actually depends on byte order.
+#if !defined (BLARGG_BIG_ENDIAN) && !defined (BLARGG_LITTLE_ENDIAN)
+#ifdef __GLIBC__
+	// GCC handles this for us
+	#include <endian.h>
+	#if __BYTE_ORDER == __LITTLE_ENDIAN
+		#define BLARGG_LITTLE_ENDIAN 1
+	#elif __BYTE_ORDER == __BIG_ENDIAN
+		#define BLARGG_BIG_ENDIAN 1
+	#endif
+#else
+
+#if defined (LSB_FIRST) || defined (__LITTLE_ENDIAN__) || BLARGG_CPU_X86 || \
+		(defined (LITTLE_ENDIAN) && LITTLE_ENDIAN+0 != 1234)
+	#define BLARGG_LITTLE_ENDIAN 1
+#endif
+
+#if defined (MSB_FIRST)     || defined (__BIG_ENDIAN__) || defined (WORDS_BIGENDIAN) || \
+	defined (__sparc__)     ||  BLARGG_CPU_POWERPC || \
+	(defined (BIG_ENDIAN) && BIG_ENDIAN+0 != 4321)
+	#define BLARGG_BIG_ENDIAN 1
+#elif !defined (__mips__)
+	// No endian specified; assume little-endian, since it's most common
+	#define BLARGG_LITTLE_ENDIAN 1
+#endif
+#endif
+#endif
+
+#if BLARGG_LITTLE_ENDIAN && BLARGG_BIG_ENDIAN
+	#undef BLARGG_LITTLE_ENDIAN
+	#undef BLARGG_BIG_ENDIAN
+#endif
+
+inline void blargg_verify_byte_order()
+{
+	#ifndef NDEBUG
+		#if BLARGG_BIG_ENDIAN
+			volatile int i = 1;
+			assert( *(volatile char*) &i == 0 );
+		#elif BLARGG_LITTLE_ENDIAN
+			volatile int i = 1;
+			assert( *(volatile char*) &i != 0 );
+		#endif
+	#endif
+}
+
+inline unsigned get_le16( void const* p )
+{
+	return  (unsigned) ((unsigned char const*) p) [1] << 8 |
+			(unsigned) ((unsigned char const*) p) [0];
+}
+
+inline unsigned get_be16( void const* p )
+{
+	return  (unsigned) ((unsigned char const*) p) [0] << 8 |
+			(unsigned) ((unsigned char const*) p) [1];
+}
+
+inline blargg_ulong get_le32( void const* p )
+{
+	return  (blargg_ulong) ((unsigned char const*) p) [3] << 24 |
+			(blargg_ulong) ((unsigned char const*) p) [2] << 16 |
+			(blargg_ulong) ((unsigned char const*) p) [1] <<  8 |
+			(blargg_ulong) ((unsigned char const*) p) [0];
+}
+
+inline blargg_ulong get_be32( void const* p )
+{
+	return  (blargg_ulong) ((unsigned char const*) p) [0] << 24 |
+			(blargg_ulong) ((unsigned char const*) p) [1] << 16 |
+			(blargg_ulong) ((unsigned char const*) p) [2] <<  8 |
+			(blargg_ulong) ((unsigned char const*) p) [3];
+}
+
+inline void set_le16( void* p, unsigned n )
+{
+	((unsigned char*) p) [1] = (unsigned char) (n >> 8);
+	((unsigned char*) p) [0] = (unsigned char) n;
+}
+
+inline void set_be16( void* p, unsigned n )
+{
+	((unsigned char*) p) [0] = (unsigned char) (n >> 8);
+	((unsigned char*) p) [1] = (unsigned char) n;
+}
+
+inline void set_le32( void* p, blargg_ulong n )
+{
+	((unsigned char*) p) [0] = (unsigned char) n;
+	((unsigned char*) p) [1] = (unsigned char) (n >> 8);
+	((unsigned char*) p) [2] = (unsigned char) (n >> 16);
+	((unsigned char*) p) [3] = (unsigned char) (n >> 24);
+}
+
+inline void set_be32( void* p, blargg_ulong n )
+{
+	((unsigned char*) p) [3] = (unsigned char) n;
+	((unsigned char*) p) [2] = (unsigned char) (n >> 8);
+	((unsigned char*) p) [1] = (unsigned char) (n >> 16);
+	((unsigned char*) p) [0] = (unsigned char) (n >> 24);
+}
+
+#if BLARGG_NONPORTABLE
+	// Optimized implementation if byte order is known
+	#if BLARGG_LITTLE_ENDIAN
+		#define GET_LE16( addr )        (*(uint16_t*) (addr))
+		#define GET_LE32( addr )        (*(uint32_t*) (addr))
+		#define SET_LE16( addr, data )  (void) (*(uint16_t*) (addr) = (data))
+		#define SET_LE32( addr, data )  (void) (*(uint32_t*) (addr) = (data))
+	#elif BLARGG_BIG_ENDIAN
+		#define GET_BE16( addr )        (*(uint16_t*) (addr))
+		#define GET_BE32( addr )        (*(uint32_t*) (addr))
+		#define SET_BE16( addr, data )  (void) (*(uint16_t*) (addr) = (data))
+		#define SET_BE32( addr, data )  (void) (*(uint32_t*) (addr) = (data))
+		
+		#if BLARGG_CPU_POWERPC
+			// PowerPC has special byte-reversed instructions
+			#if defined (__MWERKS__)
+				#define GET_LE16( addr )        (__lhbrx( addr, 0 ))
+				#define GET_LE32( addr )        (__lwbrx( addr, 0 ))
+				#define SET_LE16( addr, in )    (__sthbrx( in, addr, 0 ))
+				#define SET_LE32( addr, in )    (__stwbrx( in, addr, 0 ))
+			#elif defined (__GNUC__)
+				#define GET_LE16( addr )        ({unsigned short ppc_lhbrx_; __asm__ volatile( "lhbrx %0,0,%1" : "=r" (ppc_lhbrx_) : "r" (addr) : "memory" ); ppc_lhbrx_;})
+				#define GET_LE32( addr )        ({unsigned short ppc_lwbrx_; __asm__ volatile( "lwbrx %0,0,%1" : "=r" (ppc_lwbrx_) : "r" (addr) : "memory" ); ppc_lwbrx_;})
+				#define SET_LE16( addr, in )    ({__asm__ volatile( "sthbrx %0,0,%1" : : "r" (in), "r" (addr) : "memory" );})
+				#define SET_LE32( addr, in )    ({__asm__ volatile( "stwbrx %0,0,%1" : : "r" (in), "r" (addr) : "memory" );})
+			#endif
+		#endif
+	#endif
+#endif
+
+#ifndef GET_LE16
+	#define GET_LE16( addr )        get_le16( addr )
+	#define SET_LE16( addr, data )  set_le16( addr, data )
+#endif
+
+#ifndef GET_LE32
+	#define GET_LE32( addr )        get_le32( addr )
+	#define SET_LE32( addr, data )  set_le32( addr, data )
+#endif
+
+#ifndef GET_BE16
+	#define GET_BE16( addr )        get_be16( addr )
+	#define SET_BE16( addr, data )  set_be16( addr, data )
+#endif
+
+#ifndef GET_BE32
+	#define GET_BE32( addr )        get_be32( addr )
+	#define SET_BE32( addr, data )  set_be32( addr, data )
+#endif
+
+// auto-selecting versions
+
+inline void set_le( uint16_t* p, unsigned     n ) { SET_LE16( p, n ); }
+inline void set_le( uint32_t* p, blargg_ulong n ) { SET_LE32( p, n ); }
+inline void set_be( uint16_t* p, unsigned     n ) { SET_BE16( p, n ); }
+inline void set_be( uint32_t* p, blargg_ulong n ) { SET_BE32( p, n ); }
+inline unsigned     get_le( uint16_t* p ) { return GET_LE16( p ); }
+inline blargg_ulong get_le( uint32_t* p ) { return GET_LE32( p ); }
+inline unsigned     get_be( uint16_t* p ) { return GET_BE16( p ); }
+inline blargg_ulong get_be( uint32_t* p ) { return GET_BE32( p ); }
+
+#endif
diff --git a/libraries/game-music-emu/gme/blargg_source.h b/libraries/game-music-emu/gme/blargg_source.h
new file mode 100644
index 000000000..b65afd30b
--- /dev/null
+++ b/libraries/game-music-emu/gme/blargg_source.h
@@ -0,0 +1,123 @@
+/* Included at the beginning of library source files, after all other #include lines.
+Sets up helpful macros and services used in my source code. They don't need
+module an annoying module prefix on their names since they are defined after
+all other #include lines. */
+
+#ifndef BLARGG_SOURCE_H
+#define BLARGG_SOURCE_H
+
+// If debugging is enabled, abort program if expr is false. Meant for checking
+// internal state and consistency. A failed assertion indicates a bug in the module.
+// void assert( bool expr );
+#include <assert.h>
+
+// If debugging is enabled and expr is false, abort program. Meant for checking
+// caller-supplied parameters and operations that are outside the control of the
+// module. A failed requirement indicates a bug outside the module.
+// void require( bool expr );
+#undef require
+#define require( expr ) assert( expr )
+
+// Use to provide hints to compiler for optimized code layout in situations where we
+// can almost always expect a conditional to go one way or the other.  Should only be
+// used in situations where an unexpected branch is truly exceptional though!
+#undef likely
+#undef unlikely
+#ifdef __GNUC__
+    #define likely( x ) __builtin_expect(x, 1)
+    #define unlikely( x ) __builtin_expect(x, 0)
+#else
+    #define likely( x ) (x)
+    #define unlikely( x ) (x)
+#endif
+
+// Like printf() except output goes to debug log file. Might be defined to do
+// nothing (not even evaluate its arguments).
+// void debug_printf( const char* format, ... );
+static inline void blargg_dprintf_( const char*, ... ) { }
+#undef debug_printf
+#define debug_printf (1) ? (void) 0 : blargg_dprintf_
+
+// If enabled, evaluate expr and if false, make debug log entry with source file
+// and line. Meant for finding situations that should be examined further, but that
+// don't indicate a problem. In all cases, execution continues normally.
+#undef check
+#define check( expr ) ((void) 0)
+
+// If expr yields error string, return it from current function, otherwise continue.
+#undef RETURN_ERR
+#define RETURN_ERR( expr ) do {                         \
+		blargg_err_t blargg_return_err_ = (expr);               \
+		if ( blargg_return_err_ ) return blargg_return_err_;    \
+	} while ( 0 )
+
+// If ptr is 0, return out of memory error string.
+#undef CHECK_ALLOC
+#define CHECK_ALLOC( ptr ) do { if ( (ptr) == 0 ) return "Out of memory"; } while ( 0 )
+
+// Avoid any macros which evaluate their arguments multiple times
+#undef min
+#undef max
+
+#define DEF_MIN_MAX( type ) \
+	static inline type min( type x, type y ) { if ( x < y ) return x; return y; }\
+	static inline type max( type x, type y ) { if ( y < x ) return x; return y; }
+
+DEF_MIN_MAX( int )
+DEF_MIN_MAX( unsigned )
+DEF_MIN_MAX( long )
+DEF_MIN_MAX( unsigned long )
+DEF_MIN_MAX( float )
+DEF_MIN_MAX( double )
+
+#undef DEF_MIN_MAX
+
+/*
+// using const references generates crappy code, and I am currenly only using these
+// for built-in types, so they take arguments by value
+
+// TODO: remove
+inline int min( int x, int y ) 
+template<class T>
+inline T min( T x, T y )
+{
+	if ( x < y )
+		return x;
+	return y;
+}
+
+template<class T>
+inline T max( T x, T y )
+{
+	if ( x < y )
+		return y;
+	return x;
+}
+*/
+
+// TODO: good idea? bad idea?
+#undef byte
+#define byte byte_
+typedef unsigned char byte;
+
+// Setup compiler defines useful for exporting required public API symbols in gme.cpp
+#ifndef BLARGG_EXPORT
+    #if defined (_WIN32) && defined(BLARGG_BUILD_DLL)
+        #define BLARGG_EXPORT __declspec(dllexport)
+    #elif defined (LIBGME_VISIBILITY)
+        #define BLARGG_EXPORT __attribute__((visibility ("default")))
+    #else
+        #define BLARGG_EXPORT
+    #endif
+#endif
+
+// deprecated
+#define BLARGG_CHECK_ALLOC CHECK_ALLOC
+#define BLARGG_RETURN_ERR RETURN_ERR
+
+// BLARGG_SOURCE_BEGIN: If defined, #included, allowing redefition of debug_printf and check
+#ifdef BLARGG_SOURCE_BEGIN
+	#include BLARGG_SOURCE_BEGIN
+#endif
+
+#endif
diff --git a/libraries/game-music-emu/gme/gb_cpu_io.h b/libraries/game-music-emu/gme/gb_cpu_io.h
new file mode 100644
index 000000000..8bd69aa2d
--- /dev/null
+++ b/libraries/game-music-emu/gme/gb_cpu_io.h
@@ -0,0 +1,72 @@
+
+#include "Gbs_Emu.h"
+
+#include "blargg_source.h"
+
+int Gbs_Emu::cpu_read( gb_addr_t addr )
+{
+	int result = *cpu::get_code( addr );
+	if ( unsigned (addr - Gb_Apu::start_addr) < Gb_Apu::register_count )
+		result = apu.read_register( clock(), addr );
+#ifndef NDEBUG
+	else if ( unsigned (addr - 0x8000) < 0x2000 || unsigned (addr - 0xE000) < 0x1F00 )
+		debug_printf( "Read from unmapped memory $%.4x\n", (unsigned) addr );
+	else if ( unsigned (addr - 0xFF01) < 0xFF80 - 0xFF01 )
+		debug_printf( "Unhandled I/O read 0x%4X\n", (unsigned) addr );
+#endif
+	return result;
+}
+
+void Gbs_Emu::cpu_write( gb_addr_t addr, int data )
+{
+	unsigned offset = addr - ram_addr;
+	if ( offset <= 0xFFFF - ram_addr )
+	{
+		ram [offset] = data;
+		if ( (addr ^ 0xE000) <= 0x1F80 - 1 )
+		{
+			if ( unsigned (addr - Gb_Apu::start_addr) < Gb_Apu::register_count )
+			{
+				GME_APU_HOOK( this, addr - Gb_Apu::start_addr, data );
+				apu.write_register( clock(), addr, data );
+			}
+			else if ( (addr ^ 0xFF06) < 2 )
+				update_timer();
+			else if ( addr == joypad_addr )
+				ram [offset] = 0; // keep joypad return value 0
+			else
+				ram [offset] = 0xFF;
+
+			//if ( addr == 0xFFFF )
+			//  debug_printf( "Wrote interrupt mask\n" );
+		}
+	}
+	else if ( (addr ^ 0x2000) <= 0x2000 - 1 )
+	{
+		set_bank( data );
+	}
+#ifndef NDEBUG
+	else if ( unsigned (addr - 0x8000) < 0x2000 || unsigned (addr - 0xE000) < 0x1F00 )
+	{
+		debug_printf( "Wrote to unmapped memory $%.4x\n", (unsigned) addr );
+	}
+#endif
+}
+
+#define CPU_READ_FAST( cpu, addr, time, out ) \
+	CPU_READ_FAST_( STATIC_CAST(Gbs_Emu*,cpu), addr, time, out )
+
+#define CPU_READ_FAST_( emu, addr, time, out ) \
+{\
+	out = READ_PROG( addr );\
+	if ( unsigned (addr - Gb_Apu::start_addr) < Gb_Apu::register_count )\
+		out = emu->apu.read_register( emu->cpu_time - time * clocks_per_instr, addr );\
+	else\
+		check( out == emu->cpu_read( addr ) );\
+}
+
+#define CPU_READ( cpu, addr, time ) \
+	STATIC_CAST(Gbs_Emu*,cpu)->cpu_read( addr )
+
+#define CPU_WRITE( cpu, addr, data, time ) \
+	STATIC_CAST(Gbs_Emu*,cpu)->cpu_write( addr, data )
diff --git a/libraries/game-music-emu/gme/gme.cpp b/libraries/game-music-emu/gme/gme.cpp
new file mode 100644
index 000000000..8558e0904
--- /dev/null
+++ b/libraries/game-music-emu/gme/gme.cpp
@@ -0,0 +1,420 @@
+// Game_Music_Emu https://bitbucket.org/mpyne/game-music-emu/
+
+#include "Music_Emu.h"
+
+#include "gme_types.h"
+#if !GME_DISABLE_STEREO_DEPTH
+#include "Effects_Buffer.h"
+#endif
+#include "blargg_endian.h"
+#include <string.h>
+#include <ctype.h>
+
+/* Copyright (C) 2003-2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include "blargg_source.h"
+
+BLARGG_EXPORT gme_type_t const* gme_type_list()
+{
+	static gme_type_t const gme_type_list_ [] = {
+#ifdef GME_TYPE_LIST
+	GME_TYPE_LIST,
+#else
+	#ifdef USE_GME_AY
+	            gme_ay_type,
+	#endif
+	#ifdef USE_GME_GBS
+	            gme_gbs_type,
+	#endif
+	#ifdef USE_GME_GYM
+	            gme_gym_type,
+	#endif
+	#ifdef USE_GME_HES
+	            gme_hes_type,
+	#endif
+	#ifdef USE_GME_KSS
+	            gme_kss_type,
+	#endif
+	#ifdef USE_GME_NSF
+	            gme_nsf_type,
+	#endif
+	#ifdef USE_GME_NSFE
+	            gme_nsfe_type,
+	#endif
+	#ifdef USE_GME_SAP
+	            gme_sap_type,
+	#endif
+	#ifdef USE_GME_SPC
+	            gme_spc_type,
+	#endif
+	#ifdef USE_GME_VGM
+	            gme_vgm_type,
+	            gme_vgz_type,
+	#endif
+#endif
+        0
+    };
+
+	return gme_type_list_;
+}
+
+BLARGG_EXPORT const char* gme_identify_header( void const* header )
+{
+	switch ( get_be32( header ) )
+	{
+		case BLARGG_4CHAR('Z','X','A','Y'):  return "AY";
+		case BLARGG_4CHAR('G','B','S',0x01): return "GBS";
+		case BLARGG_4CHAR('G','Y','M','X'):  return "GYM";
+		case BLARGG_4CHAR('H','E','S','M'):  return "HES";
+		case BLARGG_4CHAR('K','S','C','C'):
+		case BLARGG_4CHAR('K','S','S','X'):  return "KSS";
+		case BLARGG_4CHAR('N','E','S','M'):  return "NSF";
+		case BLARGG_4CHAR('N','S','F','E'):  return "NSFE";
+		case BLARGG_4CHAR('S','A','P',0x0D): return "SAP";
+		case BLARGG_4CHAR('S','N','E','S'):  return "SPC";
+		case BLARGG_4CHAR('V','g','m',' '):  return "VGM";
+	}
+	if (get_be16(header) == BLARGG_2CHAR(0x1F, 0x8B))
+		return "VGZ";
+	return "";
+}
+
+static void to_uppercase( const char* in, int len, char* out )
+{
+	for ( int i = 0; i < len; i++ )
+	{
+		if ( !(out [i] = toupper( in [i] )) )
+			return;
+	}
+	*out = 0; // extension too long
+}
+
+BLARGG_EXPORT gme_type_t gme_identify_extension( const char* extension_ )
+{
+	char const* end = strrchr( extension_, '.' );
+	if ( end )
+		extension_ = end + 1;
+
+	char extension [6];
+	to_uppercase( extension_, sizeof extension, extension );
+
+	for ( gme_type_t const* types = gme_type_list(); *types; types++ )
+		if ( !strcmp( extension, (*types)->extension_ ) )
+			return *types;
+	return 0;
+}
+
+BLARGG_EXPORT const char *gme_type_extension( gme_type_t music_type )
+{
+	const gme_type_t_ *const music_typeinfo = static_cast<const gme_type_t_ *>( music_type );
+	if ( music_type )
+		return music_typeinfo->extension_;
+	return "";
+}
+
+BLARGG_EXPORT gme_err_t gme_identify_file( const char* path, gme_type_t* type_out )
+{
+	*type_out = gme_identify_extension( path );
+	// TODO: don't examine header if file has extension?
+	if ( !*type_out )
+	{
+		char header [4];
+		GME_FILE_READER in;
+		RETURN_ERR( in.open( path ) );
+		RETURN_ERR( in.read( header, sizeof header ) );
+		*type_out = gme_identify_extension( gme_identify_header( header ) );
+	}
+	return 0;
+}
+
+BLARGG_EXPORT gme_err_t gme_open_data( void const* data, long size, Music_Emu** out, int sample_rate )
+{
+	require( (data || !size) && out );
+	*out = 0;
+
+	gme_type_t file_type = 0;
+	if ( size >= 4 )
+		file_type = gme_identify_extension( gme_identify_header( data ) );
+	if ( !file_type )
+		return gme_wrong_file_type;
+
+	Music_Emu* emu = gme_new_emu( file_type, sample_rate );
+	CHECK_ALLOC( emu );
+
+	gme_err_t err = gme_load_data( emu, data, size );
+
+	if ( err )
+		delete emu;
+	else
+		*out = emu;
+
+	return err;
+}
+
+BLARGG_EXPORT gme_err_t gme_open_file( const char* path, Music_Emu** out, int sample_rate )
+{
+	require( path && out );
+	*out = 0;
+
+	GME_FILE_READER in;
+	RETURN_ERR( in.open( path ) );
+
+	char header [4];
+	int header_size = 0;
+
+	gme_type_t file_type = gme_identify_extension( path );
+	if ( !file_type )
+	{
+		header_size = sizeof header;
+		RETURN_ERR( in.read( header, sizeof header ) );
+		file_type = gme_identify_extension( gme_identify_header( header ) );
+	}
+	if ( !file_type )
+		return gme_wrong_file_type;
+
+	Music_Emu* emu = gme_new_emu( file_type, sample_rate );
+	CHECK_ALLOC( emu );
+
+	// optimization: avoids seeking/re-reading header
+	Remaining_Reader rem( header, header_size, &in );
+	gme_err_t err = emu->load( rem );
+	in.close();
+
+	if ( err )
+		delete emu;
+	else
+		*out = emu;
+
+	return err;
+}
+
+BLARGG_EXPORT void gme_set_autoload_playback_limit( Music_Emu *emu, int do_autoload_limit )
+{
+	emu->set_autoload_playback_limit( do_autoload_limit != 0 );
+}
+
+BLARGG_EXPORT int gme_autoload_playback_limit( Music_Emu *const emu )
+{
+	return emu->autoload_playback_limit();
+}
+
+// Used to implement gme_new_emu and gme_new_emu_multi_channel
+Music_Emu* gme_internal_new_emu_( gme_type_t type, int rate, bool multi_channel )
+{
+	if ( type )
+	{
+		if ( rate == gme_info_only )
+			return type->new_info();
+
+		Music_Emu* me = type->new_emu();
+		if ( me )
+		{
+		#if !GME_DISABLE_STEREO_DEPTH
+			me->set_multi_channel( multi_channel );
+
+			if ( type->flags_ & 1 )
+			{
+				if ( me->multi_channel() )
+				{
+					me->effects_buffer = BLARGG_NEW Effects_Buffer(8);
+				}
+				else
+				{
+					me->effects_buffer = BLARGG_NEW Effects_Buffer(1);
+				}
+				if ( me->effects_buffer )
+					me->set_buffer( me->effects_buffer );
+			}
+
+			if ( !(type->flags_ & 1) || me->effects_buffer )
+		#endif
+			{
+				if ( !me->set_sample_rate( rate ) )
+				{
+					check( me->type() == type );
+					return me;
+				}
+			}
+			delete me;
+		}
+	}
+	return 0;
+}
+
+BLARGG_EXPORT Music_Emu* gme_new_emu( gme_type_t type, int rate )
+{
+    return gme_internal_new_emu_( type, rate, false /* no multichannel */);
+}
+
+BLARGG_EXPORT Music_Emu* gme_new_emu_multi_channel( gme_type_t type, int rate )
+{
+    // multi-channel emulator (if possible, not all emu types support multi-channel)
+    return gme_internal_new_emu_( type, rate, true /* multichannel */);
+}
+
+BLARGG_EXPORT gme_err_t gme_load_file( Music_Emu* me, const char* path ) { return me->load_file( path ); }
+
+BLARGG_EXPORT gme_err_t gme_load_data( Music_Emu* me, void const* data, long size )
+{
+	Mem_File_Reader in( data, size );
+	return me->load( in );
+}
+
+BLARGG_EXPORT gme_err_t gme_load_custom( Music_Emu* me, gme_reader_t func, long size, void* data )
+{
+	Callback_Reader in( func, size, data );
+	return me->load( in );
+}
+
+BLARGG_EXPORT void gme_delete( Music_Emu* me ) { delete me; }
+
+BLARGG_EXPORT gme_type_t gme_type( Music_Emu const* me ) { return me->type(); }
+
+BLARGG_EXPORT const char* gme_warning( Music_Emu* me ) { return me->warning(); }
+
+BLARGG_EXPORT int gme_track_count( Music_Emu const* me ) { return me->track_count(); }
+
+struct gme_info_t_ : gme_info_t
+{
+	track_info_t info;
+
+	BLARGG_DISABLE_NOTHROW
+};
+
+BLARGG_EXPORT gme_err_t gme_track_info( Music_Emu const* me, gme_info_t** out, int track )
+{
+	*out = NULL;
+
+	gme_info_t_* info = BLARGG_NEW gme_info_t_;
+	CHECK_ALLOC( info );
+
+	gme_err_t err = me->track_info( &info->info, track );
+	if ( err )
+	{
+		gme_free_info( info );
+		return err;
+	}
+
+	#define COPY(name) info->name = info->info.name;
+
+	COPY( length );
+	COPY( intro_length );
+	COPY( loop_length );
+
+	info->i4  = -1;
+	info->i5  = -1;
+	info->i6  = -1;
+	info->i7  = -1;
+	info->i8  = -1;
+	info->i9  = -1;
+	info->i10 = -1;
+	info->i11 = -1;
+	info->i12 = -1;
+	info->i13 = -1;
+	info->i14 = -1;
+	info->i15 = -1;
+
+	info->s7  = "";
+	info->s8  = "";
+	info->s9  = "";
+	info->s10 = "";
+	info->s11 = "";
+	info->s12 = "";
+	info->s13 = "";
+	info->s14 = "";
+	info->s15 = "";
+
+	COPY( system );
+	COPY( game );
+	COPY( song );
+	COPY( author );
+	COPY( copyright );
+	COPY( comment );
+	COPY( dumper );
+
+	#undef COPY
+
+	info->play_length = info->length;
+	if ( info->play_length <= 0 )
+	{
+		info->play_length = info->intro_length + 2 * info->loop_length; // intro + 2 loops
+		if ( info->play_length <= 0 )
+			info->play_length = 150 * 1000; // 2.5 minutes
+	}
+
+	*out = info;
+
+	return 0;
+}
+
+BLARGG_EXPORT void gme_free_info( gme_info_t* info )
+{
+	delete STATIC_CAST(gme_info_t_*,info);
+}
+
+BLARGG_EXPORT void gme_set_stereo_depth( Music_Emu* me, double depth )
+{
+#if !GME_DISABLE_STEREO_DEPTH
+	if ( me->effects_buffer )
+		STATIC_CAST(Effects_Buffer*,me->effects_buffer)->set_depth( depth );
+#endif
+}
+
+BLARGG_EXPORT void*     gme_user_data      ( Music_Emu const* me )                { return me->user_data(); }
+BLARGG_EXPORT void      gme_set_user_data  ( Music_Emu* me, void* new_user_data ) { me->set_user_data( new_user_data ); }
+BLARGG_EXPORT void      gme_set_user_cleanup(Music_Emu* me, gme_user_cleanup_t func ) { me->set_user_cleanup( func ); }
+
+BLARGG_EXPORT gme_err_t gme_start_track    ( Music_Emu* me, int index )           { return me->start_track( index ); }
+BLARGG_EXPORT gme_err_t gme_play           ( Music_Emu* me, int n, short* p )     { return me->play( n, p ); }
+BLARGG_EXPORT void      gme_set_fade       ( Music_Emu* me, int start_msec )      { me->set_fade( start_msec ); }
+BLARGG_EXPORT int       gme_track_ended    ( Music_Emu const* me )                { return me->track_ended(); }
+BLARGG_EXPORT int       gme_tell           ( Music_Emu const* me )                { return me->tell(); }
+BLARGG_EXPORT int       gme_tell_samples   ( Music_Emu const* me )                { return me->tell_samples(); }
+BLARGG_EXPORT gme_err_t gme_seek           ( Music_Emu* me, int msec )            { return me->seek( msec ); }
+BLARGG_EXPORT gme_err_t gme_seek_samples   ( Music_Emu* me, int n )               { return me->seek_samples( n ); }
+BLARGG_EXPORT int       gme_voice_count    ( Music_Emu const* me )                { return me->voice_count(); }
+BLARGG_EXPORT void      gme_ignore_silence ( Music_Emu* me, int disable )         { me->ignore_silence( disable != 0 ); }
+BLARGG_EXPORT void      gme_set_tempo      ( Music_Emu* me, double t )            { me->set_tempo( t ); }
+BLARGG_EXPORT void      gme_mute_voice     ( Music_Emu* me, int index, int mute ) { me->mute_voice( index, mute != 0 ); }
+BLARGG_EXPORT void      gme_mute_voices    ( Music_Emu* me, int mask )            { me->mute_voices( mask ); }
+BLARGG_EXPORT void      gme_enable_accuracy( Music_Emu* me, int enabled )         { me->enable_accuracy( enabled ); }
+BLARGG_EXPORT void      gme_clear_playlist ( Music_Emu* me )                      { me->clear_playlist(); }
+BLARGG_EXPORT int       gme_type_multitrack( gme_type_t t )                       { return t->track_count != 1; }
+BLARGG_EXPORT int       gme_multi_channel  ( Music_Emu const* me )                { return me->multi_channel(); }
+
+BLARGG_EXPORT void      gme_set_equalizer  ( Music_Emu* me, gme_equalizer_t const* eq )
+{
+	Music_Emu::equalizer_t e = me->equalizer();
+	e.treble = eq->treble;
+	e.bass   = eq->bass;
+	me->set_equalizer( e );
+}
+
+BLARGG_EXPORT void gme_equalizer( Music_Emu const* me, gme_equalizer_t* out )
+{
+	gme_equalizer_t e = gme_equalizer_t(); // Default-init all fields to 0.0f
+	e.treble = me->equalizer().treble;
+	e.bass   = me->equalizer().bass;
+	*out = e;
+}
+
+BLARGG_EXPORT const char* gme_voice_name( Music_Emu const* me, int i )
+{
+	assert( (unsigned) i < (unsigned) me->voice_count() );
+	return me->voice_names() [i];
+}
+
+BLARGG_EXPORT const char* gme_type_system( gme_type_t type )
+{
+	assert( type );
+	return type->system;
+}
diff --git a/libraries/game-music-emu/gme/gme.h b/libraries/game-music-emu/gme/gme.h
new file mode 100644
index 000000000..80c6ce846
--- /dev/null
+++ b/libraries/game-music-emu/gme/gme.h
@@ -0,0 +1,267 @@
+/* Game music emulator library C interface (also usable from C++) */
+
+/* Game_Music_Emu 0.6.2 */
+#ifndef GME_H
+#define GME_H
+
+#ifdef __cplusplus
+	extern "C" {
+#endif
+
+#define GME_VERSION 0x000602 /* 1 byte major, 1 byte minor, 1 byte patch-level */
+
+/* Error string returned by library functions, or NULL if no error (success) */
+typedef const char* gme_err_t;
+
+/* First parameter of most gme_ functions is a pointer to the Music_Emu */
+typedef struct Music_Emu Music_Emu;
+
+
+/******** Basic operations ********/
+
+/* Create emulator and load game music file/data into it. Sets *out to new emulator. */
+gme_err_t gme_open_file( const char path [], Music_Emu** out, int sample_rate );
+
+/* Number of tracks available */
+int gme_track_count( Music_Emu const* );
+
+/* Start a track, where 0 is the first track */
+gme_err_t gme_start_track( Music_Emu*, int index );
+
+/* Generate 'count' 16-bit signed samples info 'out'. Output is in stereo. */
+gme_err_t gme_play( Music_Emu*, int count, short out [] );
+
+/* Finish using emulator and free memory */
+void gme_delete( Music_Emu* );
+
+
+/******** Track position/length ********/
+
+/* Set time to start fading track out. Once fade ends track_ended() returns true.
+Fade time can be changed while track is playing. */
+void gme_set_fade( Music_Emu*, int start_msec );
+
+/* True if a track has reached its end */
+int gme_track_ended( Music_Emu const* );
+
+/* Number of milliseconds (1000 = one second) played since beginning of track */
+int gme_tell( Music_Emu const* );
+
+/* Number of samples generated since beginning of track */
+int gme_tell_samples( Music_Emu const* );
+
+/* Seek to new time in track. Seeking backwards or far forward can take a while. */
+gme_err_t gme_seek( Music_Emu*, int msec );
+
+/* Equivalent to restarting track then skipping n samples */
+gme_err_t gme_seek_samples( Music_Emu*, int n );
+
+
+/******** Informational ********/
+
+/* If you only need track information from a music file, pass gme_info_only for
+sample_rate to open/load. */
+enum { gme_info_only = -1 };
+
+/* Most recent warning string, or NULL if none. Clears current warning after returning.
+Warning is also cleared when loading a file and starting a track. */
+const char* gme_warning( Music_Emu* );
+
+/* Load m3u playlist file (must be done after loading music) */
+gme_err_t gme_load_m3u( Music_Emu*, const char path [] );
+
+/* Clear any loaded m3u playlist and any internal playlist that the music format
+supports (NSFE for example). */
+void gme_clear_playlist( Music_Emu* );
+
+/* Gets information for a particular track (length, name, author, etc.).
+Must be freed after use. */
+typedef struct gme_info_t gme_info_t;
+gme_err_t gme_track_info( Music_Emu const*, gme_info_t** out, int track );
+
+/* Frees track information */
+void gme_free_info( gme_info_t* );
+
+struct gme_info_t
+{
+	/* times in milliseconds; -1 if unknown */
+	int length;			/* total length, if file specifies it */
+	int intro_length;	/* length of song up to looping section */
+	int loop_length;	/* length of looping section */
+	
+	/* Length if available, otherwise intro_length+loop_length*2 if available,
+	otherwise a default of 150000 (2.5 minutes). */
+	int play_length;
+	
+	int i4,i5,i6,i7,i8,i9,i10,i11,i12,i13,i14,i15; /* reserved */
+	
+	/* empty string ("") if not available */
+	const char* system;
+	const char* game;
+	const char* song;
+	const char* author;
+	const char* copyright;
+	const char* comment;
+	const char* dumper;
+	
+	const char *s7,*s8,*s9,*s10,*s11,*s12,*s13,*s14,*s15; /* reserved */
+};
+
+
+/******** Advanced playback ********/
+
+/* Adjust stereo echo depth, where 0.0 = off and 1.0 = maximum. Has no effect for
+GYM, SPC, and Sega Genesis VGM music */
+void gme_set_stereo_depth( Music_Emu*, double depth );
+
+/* Disable automatic end-of-track detection and skipping of silence at beginning
+if ignore is true */
+void gme_ignore_silence( Music_Emu*, int ignore );
+
+/* Adjust song tempo, where 1.0 = normal, 0.5 = half speed, 2.0 = double speed.
+Track length as returned by track_info() assumes a tempo of 1.0. */
+void gme_set_tempo( Music_Emu*, double tempo );
+
+/* Number of voices used by currently loaded file */
+int gme_voice_count( Music_Emu const* );
+
+/* Name of voice i, from 0 to gme_voice_count() - 1 */
+const char* gme_voice_name( Music_Emu const*, int i );
+
+/* Mute/unmute voice i, where voice 0 is first voice */
+void gme_mute_voice( Music_Emu*, int index, int mute );
+
+/* Set muting state of all voices at once using a bit mask, where -1 mutes all
+voices, 0 unmutes them all, 0x01 mutes just the first voice, etc. */
+void gme_mute_voices( Music_Emu*, int muting_mask );
+
+/* Frequency equalizer parameters (see gme.txt) */
+/* Implementers: If modified, also adjust Music_Emu::make_equalizer as needed */
+typedef struct gme_equalizer_t
+{
+	double treble; /* -50.0 = muffled, 0 = flat, +5.0 = extra-crisp */
+	double bass;   /* 1 = full bass, 90 = average, 16000 = almost no bass */
+	
+	double d2,d3,d4,d5,d6,d7,d8,d9; /* reserved */
+} gme_equalizer_t;
+
+/* Get current frequency equalizater parameters */
+void gme_equalizer( Music_Emu const*, gme_equalizer_t* out );
+
+/* Change frequency equalizer parameters */
+void gme_set_equalizer( Music_Emu*, gme_equalizer_t const* eq );
+
+/* Enables/disables most accurate sound emulation options */
+void gme_enable_accuracy( Music_Emu*, int enabled );
+
+
+/******** Game music types ********/
+
+/* Music file type identifier. Can also hold NULL. */
+typedef const struct gme_type_t_* gme_type_t;
+
+/* Emulator type constants for each supported file type */
+extern const gme_type_t
+	gme_ay_type,
+	gme_gbs_type,
+	gme_gym_type,
+	gme_hes_type,
+	gme_kss_type,
+	gme_nsf_type,
+	gme_nsfe_type,
+	gme_sap_type,
+	gme_spc_type,
+	gme_vgm_type,
+	gme_vgz_type;
+
+/* Type of this emulator */
+gme_type_t gme_type( Music_Emu const* );
+
+/* Pointer to array of all music types, with NULL entry at end. Allows a player linked
+to this library to support new music types without having to be updated. */
+gme_type_t const* gme_type_list();
+
+/* Name of game system for this music file type */
+const char* gme_type_system( gme_type_t );
+
+/* True if this music file type supports multiple tracks */
+int gme_type_multitrack( gme_type_t );
+
+/* whether the pcm output retrieved by gme_play() will have all 8 voices rendered to their
+ * individual stereo channel or (if false) these voices get mixed into one single stereo channel
+ * @since 0.6.2 */
+int gme_multi_channel( Music_Emu const* );
+
+/******** Advanced file loading ********/
+
+/* Error returned if file type is not supported */
+extern const char* const gme_wrong_file_type;
+
+/* Same as gme_open_file(), but uses file data already in memory. Makes copy of data.
+ * The resulting Music_Emu object will be set to single channel mode. */
+gme_err_t gme_open_data( void const* data, long size, Music_Emu** out, int sample_rate );
+
+/* Determine likely game music type based on first four bytes of file. Returns
+string containing proper file suffix (i.e. "NSF", "SPC", etc.) or "" if
+file header is not recognized. */
+const char* gme_identify_header( void const* header );
+
+/* Get corresponding music type for file path or extension passed in. */
+gme_type_t gme_identify_extension( const char path_or_extension [] );
+
+/**
+ * Get typical file extension for a given music type.  This is not a replacement
+ * for a file content identification library (but see gme_identify_header).
+ *
+ * @since 0.6.2
+ */
+const char* gme_type_extension( gme_type_t music_type );
+
+/* Determine file type based on file's extension or header (if extension isn't recognized).
+Sets *type_out to type, or 0 if unrecognized or error. */
+gme_err_t gme_identify_file( const char path [], gme_type_t* type_out );
+
+/* Create new emulator and set sample rate. Returns NULL if out of memory. If you only need
+track information, pass gme_info_only for sample_rate. */
+Music_Emu* gme_new_emu( gme_type_t, int sample_rate );
+
+/* Create new multichannel emulator and set sample rate. Returns NULL if out of memory.
+ * If you only need track information, pass gme_info_only for sample_rate.
+ * (see gme_multi_channel for more information on multichannel support)
+ * @since 0.6.2
+ */
+Music_Emu* gme_new_emu_multi_channel( gme_type_t, int sample_rate );
+
+/* Load music file into emulator */
+gme_err_t gme_load_file( Music_Emu*, const char path [] );
+
+/* Load music file from memory into emulator. Makes a copy of data passed. */
+gme_err_t gme_load_data( Music_Emu*, void const* data, long size );
+
+/* Load music file using custom data reader function that will be called to
+read file data. Most emulators load the entire file in one read call. */
+typedef gme_err_t (*gme_reader_t)( void* your_data, void* out, int count );
+gme_err_t gme_load_custom( Music_Emu*, gme_reader_t, long file_size, void* your_data );
+
+/* Load m3u playlist file from memory (must be done after loading music) */
+gme_err_t gme_load_m3u_data( Music_Emu*, void const* data, long size );
+
+
+/******** User data ********/
+
+/* Set/get pointer to data you want to associate with this emulator.
+You can use this for whatever you want. */
+void  gme_set_user_data( Music_Emu*, void* new_user_data );
+void* gme_user_data( Music_Emu const* );
+
+/* Register cleanup function to be called when deleting emulator, or NULL to
+clear it. Passes user_data to cleanup function. */
+typedef void (*gme_user_cleanup_t)( void* user_data );
+void gme_set_user_cleanup( Music_Emu*, gme_user_cleanup_t func );
+
+
+#ifdef __cplusplus
+	}
+#endif
+
+#endif
diff --git a/libraries/game-music-emu/gme/gme_types.h b/libraries/game-music-emu/gme/gme_types.h
new file mode 100644
index 000000000..06226f4aa
--- /dev/null
+++ b/libraries/game-music-emu/gme/gme_types.h
@@ -0,0 +1,21 @@
+#ifndef GME_TYPES_H
+#define GME_TYPES_H
+
+/*
+ * This is a default gme_types.h for use when *not* using
+ * CMake.  If CMake is in use gme_types.h.in will be
+ * processed instead.
+ */
+#define USE_GME_AY
+#define USE_GME_GBS
+#define USE_GME_GYM
+#define USE_GME_HES
+#define USE_GME_KSS
+#define USE_GME_NSF
+#define USE_GME_NSFE
+#define USE_GME_SAP
+#define USE_GME_SPC
+/* VGM and VGZ are a package deal */
+#define USE_GME_VGM
+
+#endif /* GME_TYPES_H */
diff --git a/libraries/game-music-emu/gme/gme_types.h.in b/libraries/game-music-emu/gme/gme_types.h.in
new file mode 100644
index 000000000..4829b3e16
--- /dev/null
+++ b/libraries/game-music-emu/gme/gme_types.h.in
@@ -0,0 +1,23 @@
+#ifndef GME_TYPES_H
+#define GME_TYPES_H
+
+/* CMake will either define the following to 1, or #undef it,
+ * depending on the options passed to CMake.  This is used to
+ * conditionally compile in the various emulator types.
+ *
+ * See gme_type_list() in gme.cpp
+ */
+
+#cmakedefine USE_GME_AY
+#cmakedefine USE_GME_GBS
+#cmakedefine USE_GME_GYM
+#cmakedefine USE_GME_HES
+#cmakedefine USE_GME_KSS
+#cmakedefine USE_GME_NSF
+#cmakedefine USE_GME_NSFE
+#cmakedefine USE_GME_SAP
+#cmakedefine USE_GME_SPC
+/* VGM and VGZ are a package deal */
+#cmakedefine USE_GME_VGM
+
+#endif /* GME_TYPES_H */
diff --git a/libraries/game-music-emu/gme/hes_cpu_io.h b/libraries/game-music-emu/gme/hes_cpu_io.h
new file mode 100644
index 000000000..ce60ce8ef
--- /dev/null
+++ b/libraries/game-music-emu/gme/hes_cpu_io.h
@@ -0,0 +1,101 @@
+
+#include "Hes_Emu.h"
+
+#include "blargg_source.h"
+
+int Hes_Emu::cpu_read( hes_addr_t addr )
+{
+	check( addr <= 0xFFFF );
+	int result = *cpu::get_code( addr );
+	if ( mmr [addr >> page_shift] == 0xFF )
+		result = cpu_read_( addr );
+	return result;
+}
+
+void Hes_Emu::cpu_write( hes_addr_t addr, int data )
+{
+	check( addr <= 0xFFFF );
+	byte* out = write_pages [addr >> page_shift];
+	addr &= page_size - 1;
+	if ( out )
+		out [addr] = data;
+	else if ( mmr [addr >> page_shift] == 0xFF )
+		cpu_write_( addr, data );
+}
+
+inline byte const* Hes_Emu::cpu_set_mmr( int page, int bank )
+{
+	write_pages [page] = 0;
+	if ( bank < 0x80 )
+		return rom.at_addr( bank * (blargg_long) page_size );
+	
+	byte* data = 0;
+	switch ( bank )
+	{
+		case 0xF8:
+			data = cpu::ram;
+			break;
+		
+		case 0xF9:
+		case 0xFA:
+		case 0xFB:
+			data = &sgx [(bank - 0xF9) * page_size];
+			break;
+		
+		default:
+			if ( bank != 0xFF )
+				debug_printf( "Unmapped bank $%02X\n", bank );
+			return rom.unmapped();
+	}
+	
+	write_pages [page] = data;
+	return data;
+}
+
+#define CPU_READ_FAST( cpu, addr, time, out ) \
+	CPU_READ_FAST_( STATIC_CAST(Hes_Emu*,cpu), addr, time, out )
+
+#define CPU_READ_FAST_( cpu, addr, time, out ) \
+{\
+	out = READ_PROG( addr );\
+	if ( mmr [addr >> page_shift] == 0xFF )\
+	{\
+		FLUSH_TIME();\
+		out = cpu->cpu_read_( addr );\
+		CACHE_TIME();\
+	}\
+}
+
+#define CPU_WRITE_FAST( cpu, addr, data, time ) \
+	CPU_WRITE_FAST_( STATIC_CAST(Hes_Emu*,cpu), addr, data, time )
+
+#define CPU_WRITE_FAST_( cpu, addr, data, time ) \
+{\
+	byte* out = cpu->write_pages [addr >> page_shift];\
+	addr &= page_size - 1;\
+	if ( out )\
+	{\
+		out [addr] = data;\
+	}\
+	else if ( mmr [addr >> page_shift] == 0xFF )\
+	{\
+		FLUSH_TIME();\
+		cpu->cpu_write_( addr, data );\
+		CACHE_TIME();\
+	}\
+}
+
+#define CPU_READ( cpu, addr, time ) \
+	STATIC_CAST(Hes_Emu*,cpu)->cpu_read( addr )
+
+#define CPU_WRITE( cpu, addr, data, time ) \
+	STATIC_CAST(Hes_Emu*,cpu)->cpu_write( addr, data )
+
+#define CPU_WRITE_VDP( cpu, addr, data, time ) \
+	STATIC_CAST(Hes_Emu*,cpu)->cpu_write_vdp( addr, data )
+
+#define CPU_SET_MMR( cpu, page, bank ) \
+	STATIC_CAST(Hes_Emu*,cpu)->cpu_set_mmr( page, bank )
+
+#define CPU_DONE( cpu, time, result_out ) \
+	result_out = STATIC_CAST(Hes_Emu*,cpu)->cpu_done()
diff --git a/libraries/game-music-emu/gme/libgme.pc.in b/libraries/game-music-emu/gme/libgme.pc.in
new file mode 100644
index 000000000..f057ce17c
--- /dev/null
+++ b/libraries/game-music-emu/gme/libgme.pc.in
@@ -0,0 +1,16 @@
+# entries grouped with CMake are expanded by CMake
+# ${foo} entries are left alone by CMake and much
+# later are used by pkg-config.
+prefix=@CMAKE_INSTALL_PREFIX@
+exec_prefix=${prefix}
+lib_suffix=@LIB_SUFFIX@
+libdir=${exec_prefix}/lib${lib_suffix}
+includedir=${prefix}/include
+
+Name: Game_Music_Emu
+Description: A video game emulation library for music.
+URL: https://bitbucket.org/mpyne/game-music-emu/wiki/Home
+Version: @GME_VERSION@
+Cflags: -I${includedir}
+Libs: -L${libdir} -lgme
+Libs.private: -lstdc++ @PKG_CONFIG_ZLIB@
diff --git a/libraries/game-music-emu/gme/nes_cpu_io.h b/libraries/game-music-emu/gme/nes_cpu_io.h
new file mode 100644
index 000000000..68ce9b6ff
--- /dev/null
+++ b/libraries/game-music-emu/gme/nes_cpu_io.h
@@ -0,0 +1,83 @@
+
+#include "Nsf_Emu.h"
+
+#if !NSF_EMU_APU_ONLY
+	#include "Nes_Namco_Apu.h"
+#endif
+
+#include "blargg_source.h"
+
+int Nsf_Emu::cpu_read( nes_addr_t addr )
+{
+	int result;
+	
+	result = cpu::low_mem [addr & 0x7FF];
+	if ( !(addr & 0xE000) )
+		goto exit;
+	
+	result = *cpu::get_code( addr );
+	if ( addr > 0x7FFF )
+		goto exit;
+	
+	result = sram [addr & (sizeof sram - 1)];
+	if ( addr > 0x5FFF )
+		goto exit;
+	
+	if ( addr == Nes_Apu::status_addr )
+		return apu.read_status( cpu::time() );
+	
+	#if !NSF_EMU_APU_ONLY
+		if ( addr == Nes_Namco_Apu::data_reg_addr && namco )
+			return namco->read_data();
+	#endif
+	
+	result = addr >> 8; // simulate open bus
+	
+	if ( addr != 0x2002 )
+		debug_printf( "Read unmapped $%.4X\n", (unsigned) addr );
+	
+exit:
+	return result;
+}
+
+void Nsf_Emu::cpu_write( nes_addr_t addr, int data )
+{
+	{
+		nes_addr_t offset = addr ^ sram_addr;
+		if ( offset < sizeof sram )
+		{
+			sram [offset] = data;
+			return;
+		}
+	}
+	{
+		int temp = addr & 0x7FF;
+		if ( !(addr & 0xE000) )
+		{
+			cpu::low_mem [temp] = data;
+			return;
+		}
+	}
+	
+	if ( unsigned (addr - Nes_Apu::start_addr) <= Nes_Apu::end_addr - Nes_Apu::start_addr )
+	{
+		GME_APU_HOOK( this, addr - Nes_Apu::start_addr, data );
+		apu.write_register( cpu::time(), addr, data );
+		return;
+	}
+	
+	unsigned bank = addr - bank_select_addr;
+	if ( bank < bank_count )
+	{
+		blargg_long offset = rom.mask_addr( data * (blargg_long) bank_size );
+		if ( offset >= rom.size() )
+			set_warning( "Invalid bank" );
+		cpu::map_code( (bank + 8) * bank_size, bank_size, rom.at_addr( offset ) );
+		return;
+	}
+	
+	cpu_write_misc( addr, data );
+}
+
+#define CPU_READ( cpu, addr, time )         STATIC_CAST(Nsf_Emu&,*cpu).cpu_read( addr )
+#define CPU_WRITE( cpu, addr, data, time )  STATIC_CAST(Nsf_Emu&,*cpu).cpu_write( addr, data )
diff --git a/libraries/game-music-emu/gme/sap_cpu_io.h b/libraries/game-music-emu/gme/sap_cpu_io.h
new file mode 100644
index 000000000..d009d0d9b
--- /dev/null
+++ b/libraries/game-music-emu/gme/sap_cpu_io.h
@@ -0,0 +1,26 @@
+
+#include "Sap_Emu.h"
+
+#include "blargg_source.h"
+
+#define CPU_WRITE( cpu, addr, data, time )  STATIC_CAST(Sap_Emu&,*cpu).cpu_write( addr, data )
+
+void Sap_Emu::cpu_write( sap_addr_t addr, int data )
+{
+	mem.ram [addr] = data;
+	if ( (addr >> 8) == 0xD2 )
+		cpu_write_( addr, data );
+}
+
+#ifdef NDEBUG
+	#define CPU_READ( cpu, addr, time )     READ_LOW( addr )
+#else
+	#define CPU_READ( cpu, addr, time )     STATIC_CAST(Sap_Emu&,*cpu).cpu_read( addr )
+	
+	int Sap_Emu::cpu_read( sap_addr_t addr )
+	{
+		if ( (addr & 0xF900) == 0xD000 )
+			debug_printf( "Unmapped read $%04X\n", addr );
+		return mem.ram [addr];
+	}
+#endif
diff --git a/libraries/game-music-emu/license.txt b/libraries/game-music-emu/license.txt
new file mode 100644
index 000000000..5ab7695ab
--- /dev/null
+++ b/libraries/game-music-emu/license.txt
@@ -0,0 +1,504 @@
+		  GNU LESSER GENERAL PUBLIC LICENSE
+		       Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL.  It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+  This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it.  You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations below.
+
+  When we speak of free software, we are referring to freedom of use,
+not price.  Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+  To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights.  These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+  For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you.  You must make sure that they, too, receive or can get the source
+code.  If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it.  And you must show them these terms so they know their rights.
+
+  We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+  To protect each distributor, we want to make it very clear that
+there is no warranty for the free library.  Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+
+  Finally, software patents pose a constant threat to the existence of
+any free program.  We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder.  Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+  Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License.  This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License.  We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+  When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library.  The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom.  The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+  We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License.  It also provides other free software developers Less
+of an advantage over competing non-free programs.  These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries.  However, the Lesser license provides advantages in certain
+special circumstances.
+
+  For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it becomes
+a de-facto standard.  To achieve this, non-free programs must be
+allowed to use the library.  A more frequent case is that a free
+library does the same job as widely used non-free libraries.  In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+  In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software.  For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+  Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.  Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library".  The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+
+		  GNU LESSER GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+  A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+  The "Library", below, refers to any such software library or work
+which has been distributed under these terms.  A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language.  (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+  "Source code" for a work means the preferred form of the work for
+making modifications to it.  For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+  Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it).  Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+  
+  1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+  You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+  2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) The modified work must itself be a software library.
+
+    b) You must cause the files modified to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    c) You must cause the whole of the work to be licensed at no
+    charge to all third parties under the terms of this License.
+
+    d) If a facility in the modified Library refers to a function or a
+    table of data to be supplied by an application program that uses
+    the facility, other than as an argument passed when the facility
+    is invoked, then you must make a good faith effort to ensure that,
+    in the event an application does not supply such function or
+    table, the facility still operates, and performs whatever part of
+    its purpose remains meaningful.
+
+    (For example, a function in a library to compute square roots has
+    a purpose that is entirely well-defined independent of the
+    application.  Therefore, Subsection 2d requires that any
+    application-supplied function or table used by this function must
+    be optional: if the application does not supply it, the square
+    root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library.  To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License.  (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.)  Do not make any other change in
+these notices.
+
+  Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+  This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+  4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+  If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library".  Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+  However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library".  The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+  When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library.  The
+threshold for this to be true is not precisely defined by law.
+
+  If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work.  (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+  Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+  6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+  You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License.  You must supply a copy of this License.  If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License.  Also, you must do one
+of these things:
+
+    a) Accompany the work with the complete corresponding
+    machine-readable source code for the Library including whatever
+    changes were used in the work (which must be distributed under
+    Sections 1 and 2 above); and, if the work is an executable linked
+    with the Library, with the complete machine-readable "work that
+    uses the Library", as object code and/or source code, so that the
+    user can modify the Library and then relink to produce a modified
+    executable containing the modified Library.  (It is understood
+    that the user who changes the contents of definitions files in the
+    Library will not necessarily be able to recompile the application
+    to use the modified definitions.)
+
+    b) Use a suitable shared library mechanism for linking with the
+    Library.  A suitable mechanism is one that (1) uses at run time a
+    copy of the library already present on the user's computer system,
+    rather than copying library functions into the executable, and (2)
+    will operate properly with a modified version of the library, if
+    the user installs one, as long as the modified version is
+    interface-compatible with the version that the work was made with.
+
+    c) Accompany the work with a written offer, valid for at
+    least three years, to give the same user the materials
+    specified in Subsection 6a, above, for a charge no more
+    than the cost of performing this distribution.
+
+    d) If distribution of the work is made by offering access to copy
+    from a designated place, offer equivalent access to copy the above
+    specified materials from the same place.
+
+    e) Verify that the user has already received a copy of these
+    materials or that you have already sent this user a copy.
+
+  For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it.  However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+  It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system.  Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+  7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+    a) Accompany the combined library with a copy of the same work
+    based on the Library, uncombined with any other library
+    facilities.  This must be distributed under the terms of the
+    Sections above.
+
+    b) Give prominent notice with the combined library of the fact
+    that part of it is a work based on the Library, and explaining
+    where to find the accompanying uncombined form of the same work.
+
+  8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License.  Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License.  However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+  9. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Library or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+  10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+
+  11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded.  In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+  13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation.  If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+  14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission.  For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this.  Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+			    NO WARRANTY
+
+  15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU.  SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+           How to Apply These Terms to Your New Libraries
+
+  If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change.  You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+  To apply these terms, attach the following notices to the library.  It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the library's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the
+  library `Frob' (a library for tweaking knobs) written by James Random Hacker.
+
+  <signature of Ty Coon>, 1 April 1990
+  Ty Coon, President of Vice
+
+That's all there is to it!
+
+
diff --git a/libraries/game-music-emu/readme.txt b/libraries/game-music-emu/readme.txt
new file mode 100644
index 000000000..22cc20aad
--- /dev/null
+++ b/libraries/game-music-emu/readme.txt
@@ -0,0 +1,241 @@
+Game_Music_Emu 0.6.2: Game Music Emulators
+------------------------------------------
+Game_Music_Emu is a collection of video game music file emulators that
+support the following formats and systems:
+
+AY        ZX Spectrum/Amstrad CPC
+GBS       Nintendo Game Boy
+GYM       Sega Genesis/Mega Drive
+HES       NEC TurboGrafx-16/PC Engine
+KSS       MSX Home Computer/other Z80 systems (doesn't support FM sound)
+NSF/NSFE  Nintendo NES/Famicom (with VRC 6, Namco 106, and FME-7 sound)
+SAP       Atari systems using POKEY sound chip
+SPC       Super Nintendo/Super Famicom
+VGM/VGZ   Sega Master System/Mark III, Sega Genesis/Mega Drive,BBC Micro
+
+Features:
+* C interface for use in C, C++, and other compatible languages
+* High emphasis has been placed on making the library very easy to use
+* One set of common functions work with all emulators the same way
+* Several code examples, including music player using SDL
+* Portable code for use on any system with modern or older C++ compilers
+* Adjustable output sample rate using quality band-limited resampling
+* Uniform access to text information fields and track timing information
+* End-of-track fading and automatic look ahead silence detection
+* Treble/bass and stereo echo for AY/GBS/HES/KSS/NSF/NSFE/SAP/VGM
+* Tempo can be adjusted and individual voices can be muted while playing
+* Can read music data from file, memory, or custom reader function/class
+* Can access track information without having to load into full emulator
+* M3U track listing support for multi-track formats
+* Modular design allows elimination of unneeded emulators/features
+
+This library has been used in game music players for Windows, Linux on
+several architectures, Mac OS, MorphOS, Xbox, PlayStation Portable,
+GP2X, and Nintendo DS.
+
+Author : Shay Green <gblargg@gmail.com>
+Website: https://bitbucket.org/mpyne/game-music-emu/wiki/Home
+License: GNU Lesser General Public License (LGPL)
+
+Note: When you will use MAME YM2612 emulator, the license of library
+will be GNU General Public License (GPL) v2.0+!
+
+Current Maintainer: Michael Pyne <mpyne@purinchu.net>
+
+Getting Started
+---------------
+Build a program consisting of demo/basics.c, demo/Wave_Writer.cpp, and
+all source files in gme/.
+
+Or, if you have CMake 2.6 or later, execute at a command prompt (from the
+extracted source directory):
+
+    mkdir build
+    cd build
+    cmake ../         # <-- Pass any needed CMake flags here
+    make              # To build the library
+    cd demo
+    make              # To build the demo itself
+
+Be sure "test.nsf" is in the same directory as the demo program. Running it
+should generate the recording "out.wav".
+
+You can use "make install" to install the library. To choose where to install
+the library to, use the CMake argument "-DCMAKE_INSTALL_PREFIX=/usr/local"
+(and replace /usr/local with the base path you wish to use). Alternately, you
+can specify the base path to install to when you run "make install" by passing
+'DESTDIR=/usr/local' on the make install command line (again, replace
+/usr/local as appropriate).
+
+To build a static library instead of shared (the default), pass
+-DBUILD_SHARED_LIBS=OFF to the cmake command when running cmake.
+
+A slightly more extensive demo application is available in the player/
+directory.  It requires SDL to build.
+
+Read gme.txt for more information. Post to the discussion forum for
+assistance.
+
+Files
+-----
+gme.txt               General notes about the library
+changes.txt           Changes made since previous releases
+design.txt            Library design notes
+license.txt           GNU Lesser General Public License
+CMakeLists.txt        CMake build rules
+
+test.nsf              Test file for NSF emulator
+test.m3u              Test m3u playlist for features.c demo
+
+demo/
+  basics.c            Records NSF file to wave sound file
+  features.c          Demonstrates many additional features
+  Wave_Writer.h       WAVE sound file writer used for demo output
+  Wave_Writer.cpp
+  CMakeLists.txt      CMake build rules
+
+player/               Player using the SDL multimedia library
+  player.cpp          Simple music player with waveform display
+  Music_Player.cpp    Stand alone player for background music
+  Music_Player.h
+  Audio_Scope.cpp     Audio waveform scope
+  Audio_Scope.h
+  CMakeLists.txt      CMake build rules
+
+gme/
+  blargg_config.h     Library configuration (modify this file as needed)
+
+  gme.h               Library interface header file
+  gme.cpp
+
+  Ay_Emu.h            ZX Spectrum AY emulator
+  Ay_Emu.cpp
+  Ay_Apu.cpp
+  Ay_Apu.h
+  Ay_Cpu.cpp
+  Ay_Cpu.h
+
+  Gbs_Emu.h           Nintendo Game Boy GBS emulator
+  Gbs_Emu.cpp
+  Gb_Apu.cpp
+  Gb_Apu.h
+  Gb_Cpu.cpp
+  Gb_Cpu.h
+  gb_cpu_io.h
+  Gb_Oscs.cpp
+  Gb_Oscs.h
+
+  Hes_Emu.h           TurboGrafx-16/PC Engine HES emulator
+  Hes_Apu.cpp
+  Hes_Apu.h
+  Hes_Cpu.cpp
+  Hes_Cpu.h
+  hes_cpu_io.h
+  Hes_Emu.cpp
+
+  Kss_Emu.h           MSX Home Computer/other Z80 systems KSS emulator
+  Kss_Emu.cpp
+  Kss_Cpu.cpp
+  Kss_Cpu.h
+  Kss_Scc_Apu.cpp
+  Kss_Scc_Apu.h
+  Ay_Apu.h
+  Ay_Apu.cpp
+  Sms_Apu.h
+  Sms_Apu.cpp
+  Sms_Oscs.h
+
+  Nsf_Emu.h           Nintendo NES NSF/NSFE emulator
+  Nsf_Emu.cpp
+  Nes_Apu.cpp
+  Nes_Apu.h
+  Nes_Cpu.cpp
+  Nes_Cpu.h
+  nes_cpu_io.h
+  Nes_Oscs.cpp
+  Nes_Oscs.h
+  Nes_Fme7_Apu.cpp
+  Nes_Fme7_Apu.h
+  Nes_Namco_Apu.cpp
+  Nes_Namco_Apu.h
+  Nes_Vrc6_Apu.cpp
+  Nes_Vrc6_Apu.h
+  Nsfe_Emu.h          NSFE support
+  Nsfe_Emu.cpp
+
+  Spc_Emu.h           Super Nintendo SPC emulator
+  Spc_Emu.cpp
+  Snes_Spc.cpp
+  Snes_Spc.h
+  Spc_Cpu.cpp
+  Spc_Cpu.h
+  Spc_Dsp.cpp
+  Spc_Dsp.h
+  Fir_Resampler.cpp
+  Fir_Resampler.h
+
+  Sap_Emu.h           Atari SAP emulator
+  Sap_Emu.cpp
+  Sap_Apu.cpp
+  Sap_Apu.h
+  Sap_Cpu.cpp
+  Sap_Cpu.h
+  sap_cpu_io.h
+
+  Vgm_Emu.h           Sega VGM emulator
+  Vgm_Emu_Impl.cpp
+  Vgm_Emu_Impl.h
+  Vgm_Emu.cpp
+  Ym2413_Emu.cpp
+  Ym2413_Emu.h
+  Gym_Emu.h           Sega Genesis GYM emulator
+  Gym_Emu.cpp
+  Sms_Apu.cpp         Common Sega emulator files
+  Sms_Apu.h
+  Sms_Oscs.h
+  Ym2612_Emu.h
+  Ym2612_GENS.cpp     GENS 2.10 YM2612 emulator (LGPLv2.1+ license)
+  Ym2612_GENS.h
+  Ym2612_MAME.cpp     MAME YM2612 emulator (GPLv2.0+ license)
+  Ym2612_MAME.h
+  Ym2612_Nuked.cpp    Nuked OPN2 emulator (LGPLv2.1+ license)
+  Ym2612_Nuked.h
+  Dual_Resampler.cpp
+  Dual_Resampler.h
+  Fir_Resampler.cpp
+  Fir_Resampler.h
+
+  M3u_Playlist.h      M3U playlist support
+  M3u_Playlist.cpp
+
+  Effects_Buffer.h    Sound buffer with stereo echo and panning
+  Effects_Buffer.cpp
+
+  blargg_common.h     Common files needed by all emulators
+  blargg_endian.h
+  blargg_source.h
+  Blip_Buffer.cpp
+  Blip_Buffer.h
+  Gme_File.h
+  Gme_File.cpp
+  Music_Emu.h
+  Music_Emu.cpp
+  Classic_Emu.h
+  Classic_Emu.cpp
+  Multi_Buffer.h
+  Multi_Buffer.cpp
+  Data_Reader.h
+  Data_Reader.cpp
+
+  CMakeLists.txt      CMake build rules
+
+
+Legal
+-----
+Game_Music_Emu library copyright (C) 2003-2009 Shay Green.
+Sega Genesis YM2612 emulator copyright (C) 2002 Stephane Dallongeville.
+MAME YM2612 emulator copyright (C) 2003 Jarek Burczynski, Tatsuyuki Satoh
+Nuked OPN2 emulator copyright (C) 2017 Alexey Khokholov (Nuke.YKT)
+
+--
+Shay Green <gblargg@gmail.com>
diff --git a/libraries/gdtoa/CMakeLists.txt b/libraries/gdtoa/CMakeLists.txt
new file mode 100644
index 000000000..485f3778b
--- /dev/null
+++ b/libraries/gdtoa/CMakeLists.txt
@@ -0,0 +1,45 @@
+cmake_minimum_required( VERSION 2.8.7 )
+
+set( CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -D_DEBUG" )
+
+# Disable warnings for << operator precedence (4554) and
+# unreferenced labels (4102) from VC
+if( MSVC )
+	set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4554 /wd4102" )
+endif()
+
+if( ZD_CMAKE_COMPILER_IS_GNUC_COMPATIBLE )
+	set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra" )
+endif()
+
+include_directories( ${CMAKE_CURRENT_BINARY_DIR} )
+add_definitions( -DINFNAN_CHECK -DMULTIPLE_THREADS )
+
+if( NOT MSVC AND NOT APPLE )
+	if( NOT CMAKE_CROSSCOMPILING )
+		add_executable( arithchk arithchk.c )
+	endif()
+	add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/arith.h 
+		COMMAND arithchk >${CMAKE_CURRENT_BINARY_DIR}/arith.h
+		DEPENDS arithchk )
+
+	if( NOT CMAKE_CROSSCOMPILING )
+		add_executable( qnan qnan.c arith.h )
+		set( CROSS_EXPORTS ${CROSS_EXPORTS} arithchk qnan PARENT_SCOPE )
+	endif()
+	add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/gd_qnan.h
+		COMMAND qnan >${CMAKE_CURRENT_BINARY_DIR}/gd_qnan.h
+		DEPENDS qnan )
+		
+	set( GEN_FP_FILES arith.h gd_qnan.h )
+	set( GEN_FP_DEPS ${CMAKE_CURRENT_BINARY_DIR}/arith.h ${CMAKE_CURRENT_BINARY_DIR}/gd_qnan.h )
+endif()
+
+add_library( gdtoa STATIC
+	${GEN_FP_FILES}
+	dmisc.c
+	dtoa.c
+	misc.c
+	)
+target_link_libraries( gdtoa )
+
diff --git a/libraries/gdtoa/README b/libraries/gdtoa/README
new file mode 100644
index 000000000..1bf7d91e4
--- /dev/null
+++ b/libraries/gdtoa/README
@@ -0,0 +1,400 @@
+This directory contains source for a library of binary -> decimal
+and decimal -> binary conversion routines, for single-, double-,
+and extended-precision IEEE binary floating-point arithmetic, and
+other IEEE-like binary floating-point, including "double double",
+as in
+
+	T. J. Dekker, "A Floating-Point Technique for Extending the
+	Available Precision", Numer. Math. 18 (1971), pp. 224-242
+
+and
+
+	"Inside Macintosh: PowerPC Numerics", Addison-Wesley, 1994
+
+The conversion routines use double-precision floating-point arithmetic
+and, where necessary, high precision integer arithmetic.  The routines
+are generalizations of the strtod and dtoa routines described in
+
+	David M. Gay, "Correctly Rounded Binary-Decimal and
+	Decimal-Binary Conversions", Numerical Analysis Manuscript
+	No. 90-10, Bell Labs, Murray Hill, 1990;
+	http://cm.bell-labs.com/cm/cs/what/ampl/REFS/rounding.ps.gz
+
+(based in part on papers by Clinger and Steele & White: see the
+references in the above paper).
+
+The present conversion routines should be able to use any of IEEE binary,
+VAX, or IBM-mainframe double-precision arithmetic internally, but I (dmg)
+have so far only had a chance to test them with IEEE double precision
+arithmetic.
+
+The core conversion routines are strtodg for decimal -> binary conversions
+and gdtoa for binary -> decimal conversions.  These routines operate
+on arrays of unsigned 32-bit integers of type ULong, a signed 32-bit
+exponent of type Long, and arithmetic characteristics described in
+struct FPI; FPI, Long, and ULong are defined in gdtoa.h.  File arith.h
+is supposed to provide #defines that cause gdtoa.h to define its
+types correctly.  File arithchk.c is source for a program that
+generates a suitable arith.h on all systems where I've been able to
+test it.
+
+The core conversion routines are meant to be called by helper routines
+that know details of the particular binary arithmetic of interest and
+convert.  The present directory provides helper routines for 5 variants
+of IEEE binary floating-point arithmetic, each indicated by one or
+two letters:
+
+	f	IEEE single precision
+	d	IEEE double precision
+	x	IEEE extended precision, as on Intel 80x87
+		and software emulations of Motorola 68xxx chips
+		that do not pad the way the 68xxx does, but
+		only store 80 bits
+	xL	IEEE extended precision, as on Motorola 68xxx chips
+	Q	quad precision, as on Sun Sparc chips
+	dd	double double, pairs of IEEE double numbers
+		whose sum is the desired value
+
+For decimal -> binary conversions, there are three families of
+helper routines: one for round-nearest (or the current rounding
+mode on IEEE-arithmetic systems that provide the C99 fegetround()
+function, if compiled with -DHonor_FLT_ROUNDS):
+
+	strtof
+	strtod
+	strtodd
+	strtopd
+	strtopf
+	strtopx
+	strtopxL
+	strtopQ
+
+one with rounding direction specified:
+
+	strtorf
+	strtord
+	strtordd
+	strtorx
+	strtorxL
+	strtorQ
+
+and one for computing an interval (at most one bit wide) that contains
+the decimal number:
+
+	strtoIf
+	strtoId
+	strtoIdd
+	strtoIx
+	strtoIxL
+	strtoIQ
+
+The latter call strtoIg, which makes one call on strtodg and adjusts
+the result to provide the desired interval.  On systems where native
+arithmetic can easily make one-ulp adjustments on values in the
+desired floating-point format, it might be more efficient to use the
+native arithmetic.  Routine strtodI is a variant of strtoId that
+illustrates one way to do this for IEEE binary double-precision
+arithmetic -- but whether this is more efficient remains to be seen.
+
+Functions strtod and strtof have "natural" return types, float and
+double -- strtod is specified by the C standard, and strtof appears
+in the stdlib.h of some systems, such as (at least some) Linux systems.
+The other functions write their results to their final argument(s):
+to the final two argument for the strtoI... (interval) functions,
+and to the final argument for the others (strtop... and strtor...).
+Where possible, these arguments have "natural" return types (double*
+or float*), to permit at least some type checking.  In reality, they
+are viewed as arrays of ULong (or, for the "x" functions, UShort)
+values. On systems where long double is the appropriate type, one can
+pass long double* final argument(s) to these routines.  The int value
+that these routines return is the return value from the call they make
+on strtodg; see the enum of possible return values in gdtoa.h.
+
+Source files g_ddfmt.c, misc.c, smisc.c, strtod.c, strtodg.c, and ulp.c
+should use true IEEE double arithmetic (not, e.g., double extended),
+at least for storing (and viewing the bits of) the variables declared
+"double" within them.
+
+One detail indicated in struct FPI is whether the target binary
+arithmetic departs from the IEEE standard by flushing denormalized
+numbers to 0.  On systems that do this, the helper routines for
+conversion to double-double format (when compiled with
+Sudden_Underflow #defined) penalize the bottom of the exponent
+range so that they return a nonzero result only when the least
+significant bit of the less significant member of the pair of
+double values returned can be expressed as a normalized double
+value.  An alternative would be to drop to 53-bit precision near
+the bottom of the exponent range.  To get correct rounding, this
+would (in general) require two calls on strtodg (one specifying
+126-bit arithmetic, then, if necessary, one specifying 53-bit
+arithmetic).
+
+By default, the core routine strtodg and strtod set errno to ERANGE
+if the result overflows to +Infinity or underflows to 0.  Compile
+these routines with NO_ERRNO #defined to inhibit errno assignments.
+
+Routine strtod is based on netlib's "dtoa.c from fp", and
+(f = strtod(s,se)) is more efficient for some conversions than, say,
+strtord(s,se,1,&f).  Parts of strtod require true IEEE double
+arithmetic with the default rounding mode (round-to-nearest) and, on
+systems with IEEE extended-precision registers, double-precision
+(53-bit) rounding precision.  If the machine uses (the equivalent of)
+Intel 80x87 arithmetic, the call
+	_control87(PC_53, MCW_PC);
+does this with many compilers.  Whether this or another call is
+appropriate depends on the compiler; for this to work, it may be
+necessary to #include "float.h" or another system-dependent header
+file.
+
+Source file strtodnrp.c gives a strtod that does not require 53-bit
+rounding precision on systems (such as Intel IA32 systems) that may
+suffer double rounding due to use of extended-precision registers.
+For some conversions this variant of strtod is less efficient than the
+one in strtod.c when the latter is run with 53-bit rounding precision.
+
+When float or double are involved, the values that the strto* routines
+return for NaNs are determined by gd_qnan.h, which the makefile
+generates by running the program whose source is qnan.c.  For other
+types, default NaN values are specified in g__fmt.c and may need
+adjusting.  Note that the rules for distinguishing signaling from
+quiet NaNs are system-dependent.  For cross-compilation, you need to
+determine arith.h and gd_qnan.h suitably, e.g., using the arithmetic
+of the target machine.
+
+C99's hexadecimal floating-point constants are recognized by the
+strto* routines (but this feature has not yet been heavily tested).
+Compiling with NO_HEX_FP #defined disables this feature.
+
+When compiled with -DINFNAN_CHECK, the strto* routines recognize C99's
+NaN and Infinity syntax.  Moreover, unless No_Hex_NaN is #defined, the
+strto* routines also recognize C99's NaN(...) syntax: they accept
+(case insensitively) strings of the form NaN(x), where x is a string
+of hexadecimal digits and spaces; if there is only one string of
+hexadecimal digits, it is taken for the fraction bits of the resulting
+NaN; if there are two or more strings of hexadecimal digits, each
+string is assigned to the next available sequence of 32-bit words of
+fractions bits (starting with the most significant), right-aligned in
+each sequence.  Strings of hexadecimal digits may be preceded by "0x"
+or "0X".
+
+For binary -> decimal conversions, I've provided a family of helper
+routines:
+
+	g_ffmt
+	g_dfmt
+	g_ddfmt
+	g_xfmt
+	g_xLfmt
+	g_Qfmt
+	g_ffmt_p
+	g_dfmt_p
+	g_ddfmt_p
+	g_xfmt_p
+	g_xLfmt_p
+	g_Qfmt_p
+
+which do a "%g" style conversion either to a specified number of decimal
+places (if their ndig argument is positive), or to the shortest
+decimal string that rounds to the given binary floating-point value
+(if ndig <= 0).  They write into a buffer supplied as an argument
+and return either a pointer to the end of the string (a null character)
+in the buffer, if the buffer was long enough, or 0.  Other forms of
+conversion are easily done with the help of gdtoa(), such as %e or %f
+style and conversions with direction of rounding specified (so that, if
+desired, the decimal value is either >= or <= the binary value).
+On IEEE-arithmetic systems that provide the C99 fegetround() function,
+if compiled with -DHonor_FLT_ROUNDS, these routines honor the current
+rounding mode.  For pedants, the ...fmt_p() routines are similar to the
+...fmt() routines, but have an additional final int argument, nik,
+that for conversions of Infinity or NaN, determines whether upper,
+lower, or mixed case is used, whether (...) is added to NaN values,
+and whether the sign of a NaN is reported or suppressed:
+
+	nik = ic + 6*(nb + 3*ns),
+
+where ic with 0 <= ic < 6 controls the rendering of Infinity and NaN:
+
+	0 ==> Infinity or NaN
+	1 ==> infinity or nan
+	2 ==> INFINITY or NAN
+	3 ==> Inf or NaN
+	4 ==> inf or nan
+	5 ==> INF or NAN
+
+nb with 0 <= nb < 3 determines whether NaN values are rendered
+as NaN(...):
+
+	0 ==> no
+	1 ==> yes
+	2 ==> no for default NaN values; yes otherwise
+
+ns = 0 or 1 determines whether the sign of NaN values reported:
+
+	0 ==> distinguish NaN and -NaN
+	1 ==> report both as NaN
+
+For an example of more general conversions based on dtoa(), see
+netlib's "printf.c from ampl/solvers".
+
+For double-double -> decimal, g_ddfmt() assumes IEEE-like arithmetic
+of precision max(126, #bits(input)) bits, where #bits(input) is the
+number of mantissa bits needed to represent the sum of the two double
+values in the input.
+
+The makefile creates a library, gdtoa.a.  To use the helper
+routines, a program only needs to include gdtoa.h.  All the
+source files for gdtoa.a include a more extensive gdtoaimp.h;
+among other things, gdtoaimp.h has #defines that make "internal"
+names end in _D2A.  To make a "system" library, one could modify
+these #defines to make the names start with __.
+
+Various comments about possible #defines appear in gdtoaimp.h,
+but for most purposes, arith.h should set suitable #defines.
+
+Systems with preemptive scheduling of multiple threads require some
+manual intervention.  On such systems, it's necessary to compile
+dmisc.c, dtoa.c gdota.c, and misc.c with MULTIPLE_THREADS #defined,
+and to provide (or suitably #define) two locks, acquired by
+ACQUIRE_DTOA_LOCK(n) and freed by FREE_DTOA_LOCK(n) for n = 0 or 1.
+(The second lock, accessed in pow5mult, ensures lazy evaluation of
+only one copy of high powers of 5; omitting this lock would introduce
+a small probability of wasting memory, but would otherwise be harmless.)
+Routines that call dtoa or gdtoa directly must also invoke freedtoa(s)
+to free the value s returned by dtoa or gdtoa.  It's OK to do so whether
+or not MULTIPLE_THREADS is #defined, and the helper g_*fmt routines
+listed above all do this indirectly (in gfmt_D2A(), which they all call).
+
+By default, there is a private pool of memory of length 2000 bytes
+for intermediate quantities, and MALLOC (see gdtoaimp.h) is called only
+if the private pool does not suffice.   2000 is large enough that MALLOC
+is called only under very unusual circumstances (decimal -> binary
+conversion of very long strings) for conversions to and from double
+precision.  For systems with preemptively scheduled multiple threads
+or for conversions to extended or quad, it may be appropriate to
+#define PRIVATE_MEM nnnn, where nnnn is a suitable value > 2000.
+For extended and quad precisions, -DPRIVATE_MEM=20000 is probably
+plenty even for many digits at the ends of the exponent range.
+Use of the private pool avoids some overhead.
+
+Directory test provides some test routines.  See its README.
+I've also tested this stuff (except double double conversions)
+with Vern Paxson's testbase program: see
+
+	V. Paxson and W. Kahan, "A Program for Testing IEEE Binary-Decimal
+	Conversion", manuscript, May 1991,
+	ftp://ftp.ee.lbl.gov/testbase-report.ps.Z .
+
+(The same ftp directory has source for testbase.)
+
+Some system-dependent additions to CFLAGS in the makefile:
+
+	HU-UX: -Aa -Ae
+	OSF (DEC Unix): -ieee_with_no_inexact
+	SunOS 4.1x: -DKR_headers -DBad_float_h
+
+If you want to put this stuff into a shared library and your
+operating system requires export lists for shared libraries,
+the following would be an appropriate export list:
+
+	dtoa
+	freedtoa
+	g_Qfmt
+	g_ddfmt
+	g_dfmt
+	g_ffmt
+	g_xLfmt
+	g_xfmt
+	gdtoa
+	strtoIQ
+	strtoId
+	strtoIdd
+	strtoIf
+	strtoIx
+	strtoIxL
+	strtod
+	strtodI
+	strtodg
+	strtof
+	strtopQ
+	strtopd
+	strtopdd
+	strtopf
+	strtopx
+	strtopxL
+	strtorQ
+	strtord
+	strtordd
+	strtorf
+	strtorx
+	strtorxL
+
+When time permits, I (dmg) hope to write in more detail about the
+present conversion routines; for now, this README file must suffice.
+Meanwhile, if you wish to write helper functions for other kinds of
+IEEE-like arithmetic, some explanation of struct FPI and the bits
+array may be helpful.  Both gdtoa and strtodg operate on a bits array
+described by FPI *fpi.  The bits array is of type ULong, a 32-bit
+unsigned integer type.  Floating-point numbers have fpi->nbits bits,
+with the least significant 32 bits in bits[0], the next 32 bits in
+bits[1], etc.  These numbers are regarded as integers multiplied by
+2^e (i.e., 2 to the power of the exponent e), where e is the second
+argument (be) to gdtoa and is stored in *exp by strtodg.  The minimum
+and maximum exponent values fpi->emin and fpi->emax for normalized
+floating-point numbers reflect this arrangement.  For example, the
+P754 standard for binary IEEE arithmetic specifies doubles as having
+53 bits, with normalized values of the form 1.xxxxx... times 2^(b-1023),
+with 52 bits (the x's) and the biased exponent b represented explicitly;
+b is an unsigned integer in the range 1 <= b <= 2046 for normalized
+finite doubles, b = 0 for denormals, and b = 2047 for Infinities and NaNs.
+To turn an IEEE double into the representation used by strtodg and gdtoa,
+we multiply 1.xxxx... by 2^52 (to make it an integer) and reduce the
+exponent e = (b-1023) by 52:
+
+	fpi->emin = 1 - 1023 - 52
+	fpi->emax = 1046 - 1023 - 52
+
+In various wrappers for IEEE double, we actually write -53 + 1 rather
+than -52, to emphasize that there are 53 bits including one implicit bit.
+Field fpi->rounding indicates the desired rounding direction, with
+possible values
+	FPI_Round_zero = toward 0,
+	FPI_Round_near = unbiased rounding -- the IEEE default,
+	FPI_Round_up = toward +Infinity, and
+	FPI_Round_down = toward -Infinity
+given in gdtoa.h.
+
+Field fpi->sudden_underflow indicates whether strtodg should return
+denormals or flush them to zero.  Normal floating-point numbers have
+bit fpi->nbits in the bits array on.  Denormals have it off, with
+exponent = fpi->emin.  Strtodg provides distinct return values for normals
+and denormals; see gdtoa.h.
+
+Compiling g__fmt.c, strtod.c, and strtodg.c with -DUSE_LOCALE causes
+the decimal-point character to be taken from the current locale; otherwise
+it is '.'.
+
+Source files dtoa.c and strtod.c in this directory are derived from
+netlib's "dtoa.c from fp" and are meant to function equivalently.
+When compiled with Honor_FLT_ROUNDS #defined (on systems that provide
+FLT_ROUNDS and fegetround() as specified in the C99 standard), they
+honor the current rounding mode.  Because FLT_ROUNDS is buggy on some
+(Linux) systems -- not reflecting calls on fesetround(), as the C99
+standard says it should -- when Honor_FLT_ROUNDS is #defined, the
+current rounding mode is obtained from fegetround() rather than from
+FLT_ROUNDS, unless Trust_FLT_ROUNDS is also #defined.
+
+Compile with -DUSE_LOCALE to use the current locale; otherwise
+decimal points are assumed to be '.'.  With -DUSE_LOCALE, unless
+you also compile with -DNO_LOCALE_CACHE, the details about the
+current "decimal point" character string are cached and assumed not
+to change during the program's execution.
+
+On machines with a 64-bit long double and perhaps a 113-bit "quad"
+type, you can invoke "make Printf" to add Printf (and variants, such
+as Fprintf) to gdtoa.a.  These are analogs, declared in stdio1.h, of
+printf and fprintf, etc. in which %La, %Le, %Lf, and %Lg are for long
+double and (if appropriate) %Lqa, %Lqe, %Lqf, and %Lqg are for quad
+precision printing.
+
+Please send comments to	David M. Gay (dmg at acm dot org, with " at "
+changed at "@" and " dot " changed to ".").
diff --git a/libraries/gdtoa/arithchk.c b/libraries/gdtoa/arithchk.c
new file mode 100644
index 000000000..ef6cda3db
--- /dev/null
+++ b/libraries/gdtoa/arithchk.c
@@ -0,0 +1,183 @@
+/****************************************************************
+Copyright (C) 1997, 1998 Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+****************************************************************/
+
+/* Try to deduce arith.h from arithmetic properties. */
+
+#include <stdio.h>
+
+ static int dalign;
+ typedef struct
+Akind {
+	char *name;
+	int   kind;
+	} Akind;
+
+ static Akind
+IEEE_8087	= { "IEEE_8087", 1 },
+IEEE_MC68k	= { "IEEE_MC68k", 2 },
+IBM		= { "IBM", 3 },
+VAX		= { "VAX", 4 },
+CRAY		= { "CRAY", 5};
+
+ static Akind *
+Lcheck()
+{
+	union {
+		double d;
+		long L[2];
+		} u;
+	struct {
+		double d;
+		long L;
+		} x[2];
+
+	if (sizeof(x) > 2*(sizeof(double) + sizeof(long)))
+		dalign = 1;
+	u.L[0] = u.L[1] = 0;
+	u.d = 1e13;
+	if (u.L[0] == 1117925532 && u.L[1] == -448790528)
+		return &IEEE_MC68k;
+	if (u.L[1] == 1117925532 && u.L[0] == -448790528)
+		return &IEEE_8087;
+	if (u.L[0] == -2065213935 && u.L[1] == 10752)
+		return &VAX;
+	if (u.L[0] == 1267827943 && u.L[1] == 704643072)
+		return &IBM;
+	return 0;
+	}
+
+ static Akind *
+icheck()
+{
+	union {
+		double d;
+		int L[2];
+		} u;
+	struct {
+		double d;
+		int L;
+		} x[2];
+
+	if (sizeof(x) > 2*(sizeof(double) + sizeof(int)))
+		dalign = 1;
+	u.L[0] = u.L[1] = 0;
+	u.d = 1e13;
+	if (u.L[0] == 1117925532 && u.L[1] == -448790528)
+		return &IEEE_MC68k;
+	if (u.L[1] == 1117925532 && u.L[0] == -448790528)
+		return &IEEE_8087;
+	if (u.L[0] == -2065213935 && u.L[1] == 10752)
+		return &VAX;
+	if (u.L[0] == 1267827943 && u.L[1] == 704643072)
+		return &IBM;
+	return 0;
+	}
+
+char *emptyfmt = "";	/* avoid possible warning message with printf("") */
+
+ static Akind *
+ccheck()
+{
+	union {
+		double d;
+		long L;
+		} u;
+	long Cray1;
+
+	/* Cray1 = 4617762693716115456 -- without overflow on non-Crays */
+	Cray1 = printf("%s", emptyfmt) < 0 ? 0 : 4617762;
+	if (printf(emptyfmt, Cray1) >= 0)
+		Cray1 = 1000000*Cray1 + 693716;
+	if (printf(emptyfmt, Cray1) >= 0)
+		Cray1 = 1000000*Cray1 + 115456;
+	u.d = 1e13;
+	if (u.L == Cray1)
+		return &CRAY;
+	return 0;
+	}
+
+ static int
+fzcheck()
+{
+	double a, b;
+	int i;
+
+	a = 1.;
+	b = .1;
+	for(i = 155;; b *= b, i >>= 1) {
+		if (i & 1) {
+			a *= b;
+			if (i == 1)
+				break;
+			}
+		}
+	b = a * a;
+	return b == 0.;
+	}
+
+ int
+main()
+{
+	Akind *a = 0;
+	int Ldef = 0;
+	FILE *f;
+
+#ifdef WRITE_ARITH_H	/* for Symantec's buggy "make" */
+	f = fopen("arith.h", "w");
+	if (!f) {
+		printf("Cannot open arith.h\n");
+		return 1;
+		}
+#else
+	f = stdout;
+#endif
+
+	if (sizeof(double) == 2*sizeof(long))
+		a = Lcheck();
+	else if (sizeof(double) == 2*sizeof(int)) {
+		Ldef = 1;
+		a = icheck();
+		}
+	else if (sizeof(double) == sizeof(long))
+		a = ccheck();
+	if (a) {
+		fprintf(f, "#define %s\n#define Arith_Kind_ASL %d\n",
+			a->name, a->kind);
+		if (Ldef)
+			fprintf(f, "#define Long int\n#define Intcast (int)(long)\n");
+		if (dalign)
+			fprintf(f, "#define Double_Align\n");
+		if (sizeof(char*) == 8)
+			fprintf(f, "#define X64_bit_pointers\n");
+#ifndef NO_LONG_LONG
+		if (sizeof(long long) < 8)
+#endif
+			fprintf(f, "#define NO_LONG_LONG\n");
+		if (a->kind <= 2 && fzcheck())
+			fprintf(f, "#define Sudden_Underflow\n");
+		return 0;
+		}
+	fprintf(f, "/* Unknown arithmetic */\n");
+	return 1;
+	}
diff --git a/libraries/gdtoa/dmisc.c b/libraries/gdtoa/dmisc.c
new file mode 100644
index 000000000..3e712511b
--- /dev/null
+++ b/libraries/gdtoa/dmisc.c
@@ -0,0 +1,216 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+#ifndef MULTIPLE_THREADS
+ char *dtoa_result;
+#endif
+
+ char *
+#ifdef KR_headers
+rv_alloc(i) int i;
+#else
+rv_alloc(int i)
+#endif
+{
+	int j, k, *r;
+
+	j = sizeof(ULong);
+	for(k = 0;
+		sizeof(Bigint) - sizeof(ULong) - sizeof(int) + j <= (size_t)(i);
+		j <<= 1)
+			k++;
+	r = (int*)Balloc(k);
+	*r = k;
+	return
+#ifndef MULTIPLE_THREADS
+	dtoa_result =
+#endif
+		(char *)(r+1);
+	}
+
+ char *
+#ifdef KR_headers
+nrv_alloc(s, rve, n) char *s, **rve; int n;
+#else
+nrv_alloc(char *s, char **rve, int n)
+#endif
+{
+	char *rv, *t;
+
+	t = rv = rv_alloc(n);
+	while((*t = *s++) !=0)
+		t++;
+	if (rve)
+		*rve = t;
+	return rv;
+	}
+
+/* freedtoa(s) must be used to free values s returned by dtoa
+ * when MULTIPLE_THREADS is #defined.  It should be used in all cases,
+ * but for consistency with earlier versions of dtoa, it is optional
+ * when MULTIPLE_THREADS is not defined.
+ */
+
+ void
+#ifdef KR_headers
+freedtoa(s) char *s;
+#else
+freedtoa(char *s)
+#endif
+{
+	Bigint *b = (Bigint *)((int *)s - 1);
+	b->maxwds = 1 << (b->k = *(int*)b);
+	Bfree(b);
+#ifndef MULTIPLE_THREADS
+	if (s == dtoa_result)
+		dtoa_result = 0;
+#endif
+	}
+
+ int
+quorem
+#ifdef KR_headers
+	(b, S) Bigint *b, *S;
+#else
+	(Bigint *b, Bigint *S)
+#endif
+{
+	int n;
+	ULong *bx, *bxe, q, *sx, *sxe;
+#ifdef ULLong
+	ULLong borrow, carry, y, ys;
+#else
+	ULong borrow, carry, y, ys;
+#ifdef Pack_32
+	ULong si, z, zs;
+#endif
+#endif
+
+	n = S->wds;
+#ifdef DEBUG
+	/*debug*/ if (b->wds > n)
+	/*debug*/	Bug("oversize b in quorem");
+#endif
+	if (b->wds < n)
+		return 0;
+	sx = S->x;
+	sxe = sx + --n;
+	bx = b->x;
+	bxe = bx + n;
+	q = *bxe / (*sxe + 1);	/* ensure q <= true quotient */
+#ifdef DEBUG
+	/*debug*/ if (q > 9)
+	/*debug*/	Bug("oversized quotient in quorem");
+#endif
+	if (q) {
+		borrow = 0;
+		carry = 0;
+		do {
+#ifdef ULLong
+			ys = *sx++ * (ULLong)q + carry;
+			carry = ys >> 32;
+			y = *bx - (ys & 0xffffffffUL) - borrow;
+			borrow = y >> 32 & 1UL;
+			*bx++ = (ULong)(y & 0xffffffffUL);
+#else
+#ifdef Pack_32
+			si = *sx++;
+			ys = (si & 0xffff) * q + carry;
+			zs = (si >> 16) * q + (ys >> 16);
+			carry = zs >> 16;
+			y = (*bx & 0xffff) - (ys & 0xffff) - borrow;
+			borrow = (y & 0x10000) >> 16;
+			z = (*bx >> 16) - (zs & 0xffff) - borrow;
+			borrow = (z & 0x10000) >> 16;
+			Storeinc(bx, z, y);
+#else
+			ys = *sx++ * q + carry;
+			carry = ys >> 16;
+			y = *bx - (ys & 0xffff) - borrow;
+			borrow = (y & 0x10000) >> 16;
+			*bx++ = y & 0xffff;
+#endif
+#endif
+			}
+			while(sx <= sxe);
+		if (!*bxe) {
+			bx = b->x;
+			while(--bxe > bx && !*bxe)
+				--n;
+			b->wds = n;
+			}
+		}
+	if (cmp(b, S) >= 0) {
+		q++;
+		borrow = 0;
+		carry = 0;
+		bx = b->x;
+		sx = S->x;
+		do {
+#ifdef ULLong
+			ys = *sx++ + carry;
+			carry = ys >> 32;
+			y = *bx - (ys & 0xffffffffUL) - borrow;
+			borrow = y >> 32 & 1UL;
+			*bx++ = (ULong)(y & 0xffffffffUL);
+#else
+#ifdef Pack_32
+			si = *sx++;
+			ys = (si & 0xffff) + carry;
+			zs = (si >> 16) + (ys >> 16);
+			carry = zs >> 16;
+			y = (*bx & 0xffff) - (ys & 0xffff) - borrow;
+			borrow = (y & 0x10000) >> 16;
+			z = (*bx >> 16) - (zs & 0xffff) - borrow;
+			borrow = (z & 0x10000) >> 16;
+			Storeinc(bx, z, y);
+#else
+			ys = *sx++ + carry;
+			carry = ys >> 16;
+			y = *bx - (ys & 0xffff) - borrow;
+			borrow = (y & 0x10000) >> 16;
+			*bx++ = y & 0xffff;
+#endif
+#endif
+			}
+			while(sx <= sxe);
+		bx = b->x;
+		bxe = bx + n;
+		if (!*bxe) {
+			while(--bxe > bx && !*bxe)
+				--n;
+			b->wds = n;
+			}
+		}
+	return q;
+	}
diff --git a/libraries/gdtoa/dtoa.c b/libraries/gdtoa/dtoa.c
new file mode 100644
index 000000000..c96e6a545
--- /dev/null
+++ b/libraries/gdtoa/dtoa.c
@@ -0,0 +1,780 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998, 1999 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+/* dtoa for IEEE arithmetic (dmg): convert double to ASCII string.
+ *
+ * Inspired by "How to Print Floating-Point Numbers Accurately" by
+ * Guy L. Steele, Jr. and Jon L. White [Proc. ACM SIGPLAN '90, pp. 112-126].
+ *
+ * Modifications:
+ *	1. Rather than iterating, we use a simple numeric overestimate
+ *	   to determine k = floor(log10(d)).  We scale relevant
+ *	   quantities using O(log2(k)) rather than O(k) multiplications.
+ *	2. For some modes > 2 (corresponding to ecvt and fcvt), we don't
+ *	   try to generate digits strictly left to right.  Instead, we
+ *	   compute with fewer bits and propagate the carry if necessary
+ *	   when rounding the final digit up.  This is often faster.
+ *	3. Under the assumption that input will be rounded nearest,
+ *	   mode 0 renders 1e23 as 1e23 rather than 9.999999999999999e22.
+ *	   That is, we allow equality in stopping tests when the
+ *	   round-nearest rule will give the same floating-point value
+ *	   as would satisfaction of the stopping test with strict
+ *	   inequality.
+ *	4. We remove common factors of powers of 2 from relevant
+ *	   quantities.
+ *	5. When converting floating-point integers less than 1e16,
+ *	   we use floating-point arithmetic rather than resorting
+ *	   to multiple-precision integers.
+ *	6. When asked to produce fewer than 15 digits, we first try
+ *	   to get by with floating-point arithmetic; we resort to
+ *	   multiple-precision integer arithmetic only if we cannot
+ *	   guarantee that the floating-point calculation has given
+ *	   the correctly rounded result.  For k requested digits and
+ *	   "uniformly" distributed input, the probability is
+ *	   something like 10^(k-15) that we must resort to the Long
+ *	   calculation.
+ */
+
+#ifdef Honor_FLT_ROUNDS
+#undef Check_FLT_ROUNDS
+#define Check_FLT_ROUNDS
+#else
+#define Rounding Flt_Rounds
+#endif
+
+ char *
+dtoa
+#ifdef KR_headers
+	(d0, mode, ndigits, decpt, sign, rve)
+	double d0; int mode, ndigits, *decpt, *sign; char **rve;
+#else
+	(double d0, int mode, int ndigits, int *decpt, int *sign, char **rve)
+#endif
+{
+ /*	Arguments ndigits, decpt, sign are similar to those
+	of ecvt and fcvt; trailing zeros are suppressed from
+	the returned string.  If not null, *rve is set to point
+	to the end of the return value.  If d is +-Infinity or NaN,
+	then *decpt is set to 9999.
+
+	mode:
+		0 ==> shortest string that yields d when read in
+			and rounded to nearest.
+		1 ==> like 0, but with Steele & White stopping rule;
+			e.g. with IEEE P754 arithmetic , mode 0 gives
+			1e23 whereas mode 1 gives 9.999999999999999e22.
+		2 ==> max(1,ndigits) significant digits.  This gives a
+			return value similar to that of ecvt, except
+			that trailing zeros are suppressed.
+		3 ==> through ndigits past the decimal point.  This
+			gives a return value similar to that from fcvt,
+			except that trailing zeros are suppressed, and
+			ndigits can be negative.
+		4,5 ==> similar to 2 and 3, respectively, but (in
+			round-nearest mode) with the tests of mode 0 to
+			possibly return a shorter string that rounds to d.
+			With IEEE arithmetic and compilation with
+			-DHonor_FLT_ROUNDS, modes 4 and 5 behave the same
+			as modes 2 and 3 when FLT_ROUNDS != 1.
+		6-9 ==> Debugging modes similar to mode - 4:  don't try
+			fast floating-point estimate (if applicable).
+
+		Values of mode other than 0-9 are treated as mode 0.
+
+		Sufficient space is allocated to the return value
+		to hold the suppressed trailing zeros.
+	*/
+
+	int bbits, b2, b5, be, dig, i, ieps, ilim, ilim0, ilim1,
+		j, j1, k, k0, k_check, leftright, m2, m5, s2, s5,
+		spec_case, try_quick;
+	Long L;
+#ifndef Sudden_Underflow
+	int denorm;
+	ULong x;
+#endif
+	Bigint *b, *b1, *delta, *mlo, *mhi, *S;
+	U d, d2, eps;
+	double ds;
+	char *s, *s0;
+#ifdef SET_INEXACT
+	int inexact, oldinexact;
+#endif
+#ifdef Honor_FLT_ROUNDS /*{*/
+	int Rounding;
+#ifdef Trust_FLT_ROUNDS /*{{ only define this if FLT_ROUNDS really works! */
+	Rounding = Flt_Rounds;
+#else /*}{*/
+	Rounding = 1;
+	switch(fegetround()) {
+	  case FE_TOWARDZERO:	Rounding = 0; break;
+	  case FE_UPWARD:	Rounding = 2; break;
+	  case FE_DOWNWARD:	Rounding = 3;
+	  }
+#endif /*}}*/
+#endif /*}*/
+
+#ifndef MULTIPLE_THREADS
+	if (dtoa_result) {
+		freedtoa(dtoa_result);
+		dtoa_result = 0;
+		}
+#endif
+	d.d = d0;
+	if (word0(&d) & Sign_bit) {
+		/* set sign for everything, including 0's and NaNs */
+		*sign = 1;
+		word0(&d) &= ~Sign_bit;	/* clear sign bit */
+		}
+	else
+		*sign = 0;
+
+#if defined(IEEE_Arith) + defined(VAX)
+#ifdef IEEE_Arith
+	if ((word0(&d) & Exp_mask) == Exp_mask)
+#else
+	if (word0(&d)  == 0x8000)
+#endif
+		{
+		/* Infinity or NaN */
+		*decpt = 9999;
+#ifdef IEEE_Arith
+		if (!word1(&d) && !(word0(&d) & 0xfffff))
+			return nrv_alloc("Infinity", rve, 8);
+#endif
+		return nrv_alloc("NaN", rve, 3);
+		}
+#endif
+#ifdef IBM
+	dval(&d) += 0; /* normalize */
+#endif
+	if (!dval(&d)) {
+		*decpt = 1;
+		return nrv_alloc("0", rve, 1);
+		}
+
+#ifdef SET_INEXACT
+	try_quick = oldinexact = get_inexact();
+	inexact = 1;
+#endif
+#ifdef Honor_FLT_ROUNDS
+	if (Rounding >= 2) {
+		if (*sign)
+			Rounding = Rounding == 2 ? 0 : 2;
+		else
+			if (Rounding != 2)
+				Rounding = 0;
+		}
+#endif
+
+	b = d2b(dval(&d), &be, &bbits);
+#ifdef Sudden_Underflow
+	i = (int)(word0(&d) >> Exp_shift1 & (Exp_mask>>Exp_shift1));
+#else
+	if (( i = (int)(word0(&d) >> Exp_shift1 & (Exp_mask>>Exp_shift1)) )!=0) {
+#endif
+		dval(&d2) = dval(&d);
+		word0(&d2) &= Frac_mask1;
+		word0(&d2) |= Exp_11;
+#ifdef IBM
+		if (( j = 11 - hi0bits(word0(&d2) & Frac_mask) )!=0)
+			dval(&d2) /= 1 << j;
+#endif
+
+		/* log(x)	~=~ log(1.5) + (x-1.5)/1.5
+		 * log10(x)	 =  log(x) / log(10)
+		 *		~=~ log(1.5)/log(10) + (x-1.5)/(1.5*log(10))
+		 * log10(&d) = (i-Bias)*log(2)/log(10) + log10(&d2)
+		 *
+		 * This suggests computing an approximation k to log10(&d) by
+		 *
+		 * k = (i - Bias)*0.301029995663981
+		 *	+ ( (d2-1.5)*0.289529654602168 + 0.176091259055681 );
+		 *
+		 * We want k to be too large rather than too small.
+		 * The error in the first-order Taylor series approximation
+		 * is in our favor, so we just round up the constant enough
+		 * to compensate for any error in the multiplication of
+		 * (i - Bias) by 0.301029995663981; since |i - Bias| <= 1077,
+		 * and 1077 * 0.30103 * 2^-52 ~=~ 7.2e-14,
+		 * adding 1e-13 to the constant term more than suffices.
+		 * Hence we adjust the constant term to 0.1760912590558.
+		 * (We could get a more accurate k by invoking log10,
+		 *  but this is probably not worthwhile.)
+		 */
+
+		i -= Bias;
+#ifdef IBM
+		i <<= 2;
+		i += j;
+#endif
+#ifndef Sudden_Underflow
+		denorm = 0;
+		}
+	else {
+		/* d is denormalized */
+
+		i = bbits + be + (Bias + (P-1) - 1);
+		x = i > 32  ? word0(&d) << (64 - i) | word1(&d) >> (i - 32)
+			    : word1(&d) << (32 - i);
+		dval(&d2) = x;
+		word0(&d2) -= 31*Exp_msk1; /* adjust exponent */
+		i -= (Bias + (P-1) - 1) + 1;
+		denorm = 1;
+		}
+#endif
+	ds = (dval(&d2)-1.5)*0.289529654602168 + 0.1760912590558 + i*0.301029995663981;
+	k = (int)ds;
+	if (ds < 0. && ds != k)
+		k--;	/* want k = floor(ds) */
+	k_check = 1;
+	if (k >= 0 && k <= Ten_pmax) {
+		if (dval(&d) < tens[k])
+			k--;
+		k_check = 0;
+		}
+	j = bbits - i - 1;
+	if (j >= 0) {
+		b2 = 0;
+		s2 = j;
+		}
+	else {
+		b2 = -j;
+		s2 = 0;
+		}
+	if (k >= 0) {
+		b5 = 0;
+		s5 = k;
+		s2 += k;
+		}
+	else {
+		b2 -= k;
+		b5 = -k;
+		s5 = 0;
+		}
+	if (mode < 0 || mode > 9)
+		mode = 0;
+
+#ifndef SET_INEXACT
+#ifdef Check_FLT_ROUNDS
+	try_quick = Rounding == 1;
+#else
+	try_quick = 1;
+#endif
+#endif /*SET_INEXACT*/
+
+	if (mode > 5) {
+		mode -= 4;
+		try_quick = 0;
+		}
+	leftright = 1;
+	ilim = ilim1 = -1;	/* Values for cases 0 and 1; done here to */
+				/* silence erroneous "gcc -Wall" warning. */
+	switch(mode) {
+		case 0:
+		case 1:
+			i = 18;
+			ndigits = 0;
+			break;
+		case 2:
+			leftright = 0;
+			/* no break */
+		case 4:
+			if (ndigits <= 0)
+				ndigits = 1;
+			ilim = ilim1 = i = ndigits;
+			break;
+		case 3:
+			leftright = 0;
+			/* no break */
+		case 5:
+			i = ndigits + k + 1;
+			ilim = i;
+			ilim1 = i - 1;
+			if (i <= 0)
+				i = 1;
+		}
+	s = s0 = rv_alloc(i);
+
+#ifdef Honor_FLT_ROUNDS
+	if (mode > 1 && Rounding != 1)
+		leftright = 0;
+#endif
+
+	if (ilim >= 0 && ilim <= Quick_max && try_quick) {
+
+		/* Try to get by with floating-point arithmetic. */
+
+		i = 0;
+		dval(&d2) = dval(&d);
+		k0 = k;
+		ilim0 = ilim;
+		ieps = 2; /* conservative */
+		if (k > 0) {
+			ds = tens[k&0xf];
+			j = k >> 4;
+			if (j & Bletch) {
+				/* prevent overflows */
+				j &= Bletch - 1;
+				dval(&d) /= bigtens[n_bigtens-1];
+				ieps++;
+				}
+			for(; j; j >>= 1, i++)
+				if (j & 1) {
+					ieps++;
+					ds *= bigtens[i];
+					}
+			dval(&d) /= ds;
+			}
+		else if (( j1 = -k )!=0) {
+			dval(&d) *= tens[j1 & 0xf];
+			for(j = j1 >> 4; j; j >>= 1, i++)
+				if (j & 1) {
+					ieps++;
+					dval(&d) *= bigtens[i];
+					}
+			}
+		if (k_check && dval(&d) < 1. && ilim > 0) {
+			if (ilim1 <= 0)
+				goto fast_failed;
+			ilim = ilim1;
+			k--;
+			dval(&d) *= 10.;
+			ieps++;
+			}
+		dval(&eps) = ieps*dval(&d) + 7.;
+		word0(&eps) -= (P-1)*Exp_msk1;
+		if (ilim == 0) {
+			S = mhi = 0;
+			dval(&d) -= 5.;
+			if (dval(&d) > dval(&eps))
+				goto one_digit;
+			if (dval(&d) < -dval(&eps))
+				goto no_digits;
+			goto fast_failed;
+			}
+#ifndef No_leftright
+		if (leftright) {
+			/* Use Steele & White method of only
+			 * generating digits needed.
+			 */
+			dval(&eps) = 0.5/tens[ilim-1] - dval(&eps);
+			for(i = 0;;) {
+				L = (Long)dval(&d);
+				dval(&d) -= L;
+				*s++ = '0' + (int)L;
+				if (dval(&d) < dval(&eps))
+					goto ret1;
+				if (1. - dval(&d) < dval(&eps))
+					goto bump_up;
+				if (++i >= ilim)
+					break;
+				dval(&eps) *= 10.;
+				dval(&d) *= 10.;
+				}
+			}
+		else {
+#endif
+			/* Generate ilim digits, then fix them up. */
+			dval(&eps) *= tens[ilim-1];
+			for(i = 1;; i++, dval(&d) *= 10.) {
+				L = (Long)(dval(&d));
+				if (!(dval(&d) -= L))
+					ilim = i;
+				*s++ = '0' + (int)L;
+				if (i == ilim) {
+					if (dval(&d) > 0.5 + dval(&eps))
+						goto bump_up;
+					else if (dval(&d) < 0.5 - dval(&eps)) {
+						while(*--s == '0');
+						s++;
+						goto ret1;
+						}
+					break;
+					}
+				}
+#ifndef No_leftright
+			}
+#endif
+ fast_failed:
+		s = s0;
+		dval(&d) = dval(&d2);
+		k = k0;
+		ilim = ilim0;
+		}
+
+	/* Do we have a "small" integer? */
+
+	if (be >= 0 && k <= Int_max) {
+		/* Yes. */
+		ds = tens[k];
+		if (ndigits < 0 && ilim <= 0) {
+			S = mhi = 0;
+			if (ilim < 0 || dval(&d) <= 5*ds)
+				goto no_digits;
+			goto one_digit;
+			}
+		for(i = 1;; i++, dval(&d) *= 10.) {
+			L = (Long)(dval(&d) / ds);
+			dval(&d) -= L*ds;
+#ifdef Check_FLT_ROUNDS
+			/* If FLT_ROUNDS == 2, L will usually be high by 1 */
+			if (dval(&d) < 0) {
+				L--;
+				dval(&d) += ds;
+				}
+#endif
+			*s++ = '0' + (int)L;
+			if (!dval(&d)) {
+#ifdef SET_INEXACT
+				inexact = 0;
+#endif
+				break;
+				}
+			if (i == ilim) {
+#ifdef Honor_FLT_ROUNDS
+				if (mode > 1)
+				switch(Rounding) {
+				  case 0: goto ret1;
+				  case 2: goto bump_up;
+				  }
+#endif
+				dval(&d) += dval(&d);
+#ifdef ROUND_BIASED
+				if (dval(&d) >= ds)
+#else
+				if (dval(&d) > ds || (dval(&d) == ds && L & 1))
+#endif
+					{
+ bump_up:
+					while(*--s == '9')
+						if (s == s0) {
+							k++;
+							*s = '0';
+							break;
+							}
+					++*s++;
+					}
+				break;
+				}
+			}
+		goto ret1;
+		}
+
+	m2 = b2;
+	m5 = b5;
+	mhi = mlo = 0;
+	if (leftright) {
+		i =
+#ifndef Sudden_Underflow
+			denorm ? be + (Bias + (P-1) - 1 + 1) :
+#endif
+#ifdef IBM
+			1 + 4*P - 3 - bbits + ((bbits + be - 1) & 3);
+#else
+			1 + P - bbits;
+#endif
+		b2 += i;
+		s2 += i;
+		mhi = i2b(1);
+		}
+	if (m2 > 0 && s2 > 0) {
+		i = m2 < s2 ? m2 : s2;
+		b2 -= i;
+		m2 -= i;
+		s2 -= i;
+		}
+	if (b5 > 0) {
+		if (leftright) {
+			if (m5 > 0) {
+				mhi = pow5mult(mhi, m5);
+				b1 = mult(mhi, b);
+				Bfree(b);
+				b = b1;
+				}
+			if (( j = b5 - m5 )!=0)
+				b = pow5mult(b, j);
+			}
+		else
+			b = pow5mult(b, b5);
+		}
+	S = i2b(1);
+	if (s5 > 0)
+		S = pow5mult(S, s5);
+
+	/* Check for special case that d is a normalized power of 2. */
+
+	spec_case = 0;
+	if ((mode < 2 || leftright)
+#ifdef Honor_FLT_ROUNDS
+			&& Rounding == 1
+#endif
+				) {
+		if (!word1(&d) && !(word0(&d) & Bndry_mask)
+#ifndef Sudden_Underflow
+		 && word0(&d) & (Exp_mask & ~Exp_msk1)
+#endif
+				) {
+			/* The special case */
+			b2 += Log2P;
+			s2 += Log2P;
+			spec_case = 1;
+			}
+		}
+
+	/* Arrange for convenient computation of quotients:
+	 * shift left if necessary so divisor has 4 leading 0 bits.
+	 *
+	 * Perhaps we should just compute leading 28 bits of S once
+	 * and for all and pass them and a shift to quorem, so it
+	 * can do shifts and ors to compute the numerator for q.
+	 */
+#ifdef Pack_32
+	if (( i = ((s5 ? 32 - hi0bits(S->x[S->wds-1]) : 1) + s2) & 0x1f )!=0)
+		i = 32 - i;
+#else
+	if (( i = ((s5 ? 32 - hi0bits(S->x[S->wds-1]) : 1) + s2) & 0xf )!=0)
+		i = 16 - i;
+#endif
+	if (i > 4) {
+		i -= 4;
+		b2 += i;
+		m2 += i;
+		s2 += i;
+		}
+	else if (i < 4) {
+		i += 28;
+		b2 += i;
+		m2 += i;
+		s2 += i;
+		}
+	if (b2 > 0)
+		b = lshift(b, b2);
+	if (s2 > 0)
+		S = lshift(S, s2);
+	if (k_check) {
+		if (cmp(b,S) < 0) {
+			k--;
+			b = multadd(b, 10, 0);	/* we botched the k estimate */
+			if (leftright)
+				mhi = multadd(mhi, 10, 0);
+			ilim = ilim1;
+			}
+		}
+	if (ilim <= 0 && (mode == 3 || mode == 5)) {
+		if (ilim < 0 || cmp(b,S = multadd(S,5,0)) <= 0) {
+			/* no digits, fcvt style */
+ no_digits:
+			k = -1 - ndigits;
+			goto ret;
+			}
+ one_digit:
+		*s++ = '1';
+		k++;
+		goto ret;
+		}
+	if (leftright) {
+		if (m2 > 0)
+			mhi = lshift(mhi, m2);
+
+		/* Compute mlo -- check for special case
+		 * that d is a normalized power of 2.
+		 */
+
+		mlo = mhi;
+		if (spec_case) {
+			mhi = Balloc(mhi->k);
+			Bcopy(mhi, mlo);
+			mhi = lshift(mhi, Log2P);
+			}
+
+		for(i = 1;;i++) {
+			dig = quorem(b,S) + '0';
+			/* Do we yet have the shortest decimal string
+			 * that will round to d?
+			 */
+			j = cmp(b, mlo);
+			delta = diff(S, mhi);
+			j1 = delta->sign ? 1 : cmp(b, delta);
+			Bfree(delta);
+#ifndef ROUND_BIASED
+			if (j1 == 0 && mode != 1 && !(word1(&d) & 1)
+#ifdef Honor_FLT_ROUNDS
+				&& Rounding >= 1
+#endif
+								   ) {
+				if (dig == '9')
+					goto round_9_up;
+				if (j > 0)
+					dig++;
+#ifdef SET_INEXACT
+				else if (!b->x[0] && b->wds <= 1)
+					inexact = 0;
+#endif
+				*s++ = dig;
+				goto ret;
+				}
+#endif
+			if (j < 0 || (j == 0 && mode != 1
+#ifndef ROUND_BIASED
+							&& !(word1(&d) & 1)
+#endif
+					)) {
+				if (!b->x[0] && b->wds <= 1) {
+#ifdef SET_INEXACT
+					inexact = 0;
+#endif
+					goto accept_dig;
+					}
+#ifdef Honor_FLT_ROUNDS
+				if (mode > 1)
+				 switch(Rounding) {
+				  case 0: goto accept_dig;
+				  case 2: goto keep_dig;
+				  }
+#endif /*Honor_FLT_ROUNDS*/
+				if (j1 > 0) {
+					b = lshift(b, 1);
+					j1 = cmp(b, S);
+#ifdef ROUND_BIASED
+					if (j1 >= 0 /*)*/
+#else
+					if ((j1 > 0 || (j1 == 0 && dig & 1))
+#endif
+					&& dig++ == '9')
+						goto round_9_up;
+					}
+ accept_dig:
+				*s++ = dig;
+				goto ret;
+				}
+			if (j1 > 0) {
+#ifdef Honor_FLT_ROUNDS
+				if (!Rounding)
+					goto accept_dig;
+#endif
+				if (dig == '9') { /* possible if i == 1 */
+ round_9_up:
+					*s++ = '9';
+					goto roundoff;
+					}
+				*s++ = dig + 1;
+				goto ret;
+				}
+#ifdef Honor_FLT_ROUNDS
+ keep_dig:
+#endif
+			*s++ = dig;
+			if (i == ilim)
+				break;
+			b = multadd(b, 10, 0);
+			if (mlo == mhi)
+				mlo = mhi = multadd(mhi, 10, 0);
+			else {
+				mlo = multadd(mlo, 10, 0);
+				mhi = multadd(mhi, 10, 0);
+				}
+			}
+		}
+	else
+		for(i = 1;; i++) {
+			*s++ = dig = quorem(b,S) + '0';
+			if (!b->x[0] && b->wds <= 1) {
+#ifdef SET_INEXACT
+				inexact = 0;
+#endif
+				goto ret;
+				}
+			if (i >= ilim)
+				break;
+			b = multadd(b, 10, 0);
+			}
+
+	/* Round off last digit */
+
+#ifdef Honor_FLT_ROUNDS
+	switch(Rounding) {
+	  case 0: goto trimzeros;
+	  case 2: goto roundoff;
+	  }
+#endif
+	b = lshift(b, 1);
+	j = cmp(b, S);
+#ifdef ROUND_BIASED
+	if (j >= 0)
+#else
+	if (j > 0 || (j == 0 && dig & 1))
+#endif
+		{
+ roundoff:
+		while(*--s == '9')
+			if (s == s0) {
+				k++;
+				*s++ = '1';
+				goto ret;
+				}
+		++*s++;
+		}
+	else {
+#ifdef Honor_FLT_ROUNDS
+ trimzeros:
+#endif
+		while(*--s == '0');
+		s++;
+		}
+ ret:
+	Bfree(S);
+	if (mhi) {
+		if (mlo && mlo != mhi)
+			Bfree(mlo);
+		Bfree(mhi);
+		}
+ ret1:
+#ifdef SET_INEXACT
+	if (inexact) {
+		if (!oldinexact) {
+			word0(&d) = Exp_1 + (70 << Exp_shift);
+			word1(&d) = 0;
+			dval(&d) += 1.;
+			}
+		}
+	else if (!oldinexact)
+		clear_inexact();
+#endif
+	Bfree(b);
+	*s = 0;
+	*decpt = k + 1;
+	if (rve)
+		*rve = s;
+	return s0;
+	}
diff --git a/libraries/gdtoa/g_Qfmt.c b/libraries/gdtoa/g_Qfmt.c
new file mode 100644
index 000000000..0f0697005
--- /dev/null
+++ b/libraries/gdtoa/g_Qfmt.c
@@ -0,0 +1,119 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998, 2000 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+#undef _0
+#undef _1
+
+/* one or the other of IEEE_MC68k or IEEE_8087 should be #defined */
+
+#ifdef IEEE_MC68k
+#define _0 0
+#define _1 1
+#define _2 2
+#define _3 3
+#endif
+#ifdef IEEE_8087
+#define _0 3
+#define _1 2
+#define _2 1
+#define _3 0
+#endif
+
+ char*
+#ifdef KR_headers
+g_Qfmt(buf, V, ndig, bufsize) char *buf; char *V; int ndig; size_t bufsize;
+#else
+g_Qfmt(char *buf, void *V, int ndig, size_t bufsize)
+#endif
+{
+	static FPI fpi0 = { 113, 1-16383-113+1, 32766 - 16383 - 113 + 1, 1, 0, Int_max };
+	char *b, *s, *se;
+	ULong bits[4], *L, sign;
+	int decpt, ex, i, mode;
+#ifdef Honor_FLT_ROUNDS
+#include "gdtoa_fltrnds.h"
+#else
+#define fpi &fpi0
+#endif
+
+	if (ndig < 0)
+		ndig = 0;
+	if (bufsize < (size_t)(ndig + 10))
+		return 0;
+
+	L = (ULong*)V;
+	sign = L[_0] & 0x80000000L;
+	bits[3] = L[_0] & 0xffff;
+	bits[2] = L[_1];
+	bits[1] = L[_2];
+	bits[0] = L[_3];
+	b = buf;
+	if ( (ex = (L[_0] & 0x7fff0000L) >> 16) !=0) {
+		if (ex == 0x7fff) {
+			/* Infinity or NaN */
+			if (bits[0] | bits[1] | bits[2] | bits[3])
+				b = strcp(b, "NaN");
+			else {
+				b = buf;
+				if (sign)
+					*b++ = '-';
+				b = strcp(b, "Infinity");
+				}
+			return b;
+			}
+		i = STRTOG_Normal;
+		bits[3] |= 0x10000;
+		}
+	else if (bits[0] | bits[1] | bits[2] | bits[3]) {
+		i = STRTOG_Denormal;
+		ex = 1;
+		}
+	else {
+#ifndef IGNORE_ZERO_SIGN
+		if (sign)
+			*b++ = '-';
+#endif
+		*b++ = '0';
+		*b = 0;
+		return b;
+		}
+	ex -= 0x3fff + 112;
+	mode = 2;
+	if (ndig <= 0) {
+		if (bufsize < 48)
+			return 0;
+		mode = 0;
+		}
+	s = gdtoa(fpi, ex, bits, &i, mode, ndig, &decpt, &se);
+	return g__fmt(buf, s, se, decpt, sign, bufsize);
+	}
diff --git a/libraries/gdtoa/g__fmt.c b/libraries/gdtoa/g__fmt.c
new file mode 100644
index 000000000..652c82b68
--- /dev/null
+++ b/libraries/gdtoa/g__fmt.c
@@ -0,0 +1,203 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+#ifdef USE_LOCALE
+#include "locale.h"
+#endif
+
+#ifndef ldus_QNAN0
+#define ldus_QNAN0 0x7fff
+#endif
+#ifndef ldus_QNAN1
+#define ldus_QNAN1 0xc000
+#endif
+#ifndef ldus_QNAN2
+#define ldus_QNAN2 0
+#endif
+#ifndef ldus_QNAN3
+#define ldus_QNAN3 0
+#endif
+#ifndef ldus_QNAN4
+#define ldus_QNAN4 0
+#endif
+
+ const char *InfName[6] = { "Infinity", "infinity", "INFINITY", "Inf", "inf", "INF" };
+ const char *NanName[3] = { "NaN", "nan", "NAN" };
+ ULong NanDflt_Q_D2A[4] = { 0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff };
+ ULong NanDflt_d_D2A[2] = { d_QNAN1, d_QNAN0 };
+ ULong NanDflt_f_D2A[1] = { f_QNAN };
+ ULong NanDflt_xL_D2A[3] = { 1, 0x80000000, 0x7fff0000 };
+ UShort NanDflt_ldus_D2A[5] = { ldus_QNAN4, ldus_QNAN3, ldus_QNAN2, ldus_QNAN1, ldus_QNAN0 };
+
+ char *
+#ifdef KR_headers
+g__fmt(b, s, se, decpt, sign, blen) char *b; char *s; char *se; int decpt; ULong sign; size_t blen;
+#else
+g__fmt(char *b, char *s, char *se, int decpt, ULong sign, size_t blen)
+#endif
+{
+	int i, j, k;
+	char *be, *s0;
+	size_t len;
+#ifdef USE_LOCALE
+#ifdef NO_LOCALE_CACHE
+	char *decimalpoint = localeconv()->decimal_point;
+	size_t dlen = strlen(decimalpoint);
+#else
+	char *decimalpoint;
+	static char *decimalpoint_cache;
+	static size_t dlen;
+	if (!(s0 = decimalpoint_cache)) {
+		s0 = localeconv()->decimal_point;
+		dlen = strlen(s0);
+		if ((decimalpoint_cache = (char*)MALLOC(strlen(s0) + 1))) {
+			strcpy(decimalpoint_cache, s0);
+			s0 = decimalpoint_cache;
+			}
+		}
+	decimalpoint = s0;
+#endif
+#else
+#define dlen 0
+#endif
+	s0 = s;
+	len = (se-s) + dlen + 6; /* 6 = sign + e+dd + trailing null */
+	if (blen < len)
+		goto ret0;
+	be = b + blen - 1;
+	if (sign)
+		*b++ = '-';
+	if (decpt <= -4 || decpt > se - s + 5) {
+		*b++ = *s++;
+		if (*s) {
+#ifdef USE_LOCALE
+			while((*b = *decimalpoint++))
+				++b;
+#else
+			*b++ = '.';
+#endif
+			while((*b = *s++) !=0)
+				b++;
+			}
+		*b++ = 'e';
+		/* sprintf(b, "%+.2d", decpt - 1); */
+		if (--decpt < 0) {
+			*b++ = '-';
+			decpt = -decpt;
+			}
+		else
+			*b++ = '+';
+		for(j = 2, k = 10; 10*k <= decpt; j++, k *= 10){}
+		for(;;) {
+			i = decpt / k;
+			if (b >= be)
+				goto ret0;
+			*b++ = i + '0';
+			if (--j <= 0)
+				break;
+			decpt -= i*k;
+			decpt *= 10;
+			}
+		*b = 0;
+		}
+	else if (decpt <= 0) {
+#ifdef USE_LOCALE
+		while((*b = *decimalpoint++))
+			++b;
+#else
+		*b++ = '.';
+#endif
+		if (be < b - decpt + (se - s))
+			goto ret0;
+		for(; decpt < 0; decpt++)
+			*b++ = '0';
+		while((*b = *s++) != 0)
+			b++;
+		}
+	else {
+		while((*b = *s++) != 0) {
+			b++;
+			if (--decpt == 0 && *s) {
+#ifdef USE_LOCALE
+				while(*b = *decimalpoint++)
+					++b;
+#else
+				*b++ = '.';
+#endif
+				}
+			}
+		if (b + decpt > be) {
+ ret0:
+			b = 0;
+			goto ret;
+			}
+		for(; decpt > 0; decpt--)
+			*b++ = '0';
+		*b = 0;
+		}
+ ret:
+	freedtoa(s0);
+	return b;
+ 	}
+
+ char *
+add_nanbits_D2A(char *b, size_t blen, ULong *bits, int nb)
+{
+	ULong t;
+	char *rv;
+	int i, j;
+	size_t L;
+	static char Hexdig[16] = "0123456789abcdef";
+
+	while(!bits[--nb])
+		if (!nb)
+			return b;
+	L = 8*nb + 3;
+	t = bits[nb];
+	do ++L; while((t >>= 4));
+	if (L > blen)
+		return b;
+	b += L;
+	*--b = 0;
+	rv = b;
+	*--b = /*(*/ ')';
+	for(i = 0; i < nb; ++i) {
+		t = bits[i];
+		for(j = 0; j < 8; ++j, t >>= 4)
+			*--b = Hexdig[t & 0xf];
+		}
+	t = bits[nb];
+	do *--b = Hexdig[t & 0xf]; while(t >>= 4);
+	*--b = '('; /*)*/
+	return rv;
+	}
diff --git a/libraries/gdtoa/g_ddfmt.c b/libraries/gdtoa/g_ddfmt.c
new file mode 100644
index 000000000..5ce4a076b
--- /dev/null
+++ b/libraries/gdtoa/g_ddfmt.c
@@ -0,0 +1,171 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg@acm.org). */
+
+#include "gdtoaimp.h"
+#include <string.h>
+
+ char *
+#ifdef KR_headers
+g_ddfmt(buf, dd0, ndig, bufsize) char *buf; double *dd0; int ndig; size_t bufsize;
+#else
+g_ddfmt(char *buf, double *dd0, int ndig, size_t bufsize)
+#endif
+{
+	FPI fpi;
+	char *b, *s, *se;
+	ULong *L, bits0[4], *bits, *zx;
+	int bx, by, decpt, ex, ey, i, j, mode;
+	Bigint *x, *y, *z;
+	U *dd, ddx[2];
+#ifdef Honor_FLT_ROUNDS /*{{*/
+	int Rounding;
+#ifdef Trust_FLT_ROUNDS /*{{ only define this if FLT_ROUNDS really works! */
+	Rounding = Flt_Rounds;
+#else /*}{*/
+	Rounding = 1;
+	switch(fegetround()) {
+	  case FE_TOWARDZERO:	Rounding = 0; break;
+	  case FE_UPWARD:	Rounding = 2; break;
+	  case FE_DOWNWARD:	Rounding = 3;
+	  }
+#endif /*}}*/
+#else /*}{*/
+#define Rounding FPI_Round_near
+#endif /*}}*/
+
+	if (bufsize < 10 || bufsize < (size_t)(ndig + 8))
+		return 0;
+
+	dd = (U*)dd0;
+	L = dd->L;
+	if ((L[_0] & 0x7ff00000L) == 0x7ff00000L) {
+		/* Infinity or NaN */
+		if (L[_0] & 0xfffff || L[_1]) {
+ nanret:
+			return strcp(buf, "NaN");
+			}
+		if ((L[2+_0] & 0x7ff00000) == 0x7ff00000) {
+			if (L[2+_0] & 0xfffff || L[2+_1])
+				goto nanret;
+			if ((L[_0] ^ L[2+_0]) & 0x80000000L)
+				goto nanret;	/* Infinity - Infinity */
+			}
+ infret:
+		b = buf;
+		if (L[_0] & 0x80000000L)
+			*b++ = '-';
+		return strcp(b, "Infinity");
+		}
+	if ((L[2+_0] & 0x7ff00000) == 0x7ff00000) {
+		L += 2;
+		if (L[_0] & 0xfffff || L[_1])
+			goto nanret;
+		goto infret;
+		}
+	if (dval(&dd[0]) + dval(&dd[1]) == 0.) {
+		b = buf;
+#ifndef IGNORE_ZERO_SIGN
+		if (L[_0] & L[2+_0] & 0x80000000L)
+			*b++ = '-';
+#endif
+		*b++ = '0';
+		*b = 0;
+		return b;
+		}
+	if ((L[_0] & 0x7ff00000L) < (L[2+_0] & 0x7ff00000L)) {
+		dval(&ddx[1]) = dval(&dd[0]);
+		dval(&ddx[0]) = dval(&dd[1]);
+		dd = ddx;
+		L = dd->L;
+		}
+	z = d2b(dval(&dd[0]), &ex, &bx);
+	if (dval(&dd[1]) == 0.)
+		goto no_y;
+	x = z;
+	y = d2b(dval(&dd[1]), &ey, &by);
+	if ( (i = ex - ey) !=0) {
+		if (i > 0) {
+			x = lshift(x, i);
+			ex = ey;
+			}
+		else
+			y = lshift(y, -i);
+		}
+	if ((L[_0] ^ L[2+_0]) & 0x80000000L) {
+		z = diff(x, y);
+		if (L[_0] & 0x80000000L)
+			z->sign = 1 - z->sign;
+		}
+	else {
+		z = sum(x, y);
+		if (L[_0] & 0x80000000L)
+			z->sign = 1;
+		}
+	Bfree(x);
+	Bfree(y);
+ no_y:
+	bits = zx = z->x;
+	for(i = 0; !*zx; zx++)
+		i += 32;
+	i += lo0bits(zx);
+	if (i) {
+		rshift(z, i);
+		ex += i;
+		}
+	fpi.nbits = z->wds * 32 - hi0bits(z->x[j = z->wds-1]);
+	if (fpi.nbits < 106) {
+		fpi.nbits = 106;
+		if (j < 3) {
+			for(i = 0; i <= j; i++)
+				bits0[i] = bits[i];
+			while(i < 4)
+				bits0[i++] = 0;
+			bits = bits0;
+			}
+		}
+	mode = 2;
+	if (ndig <= 0) {
+		if (bufsize < (size_t)((int)(fpi.nbits * .301029995664) + 10)) {
+			Bfree(z);
+			return 0;
+			}
+		mode = 0;
+		}
+	fpi.emin = 1-1023-53+1;
+	fpi.emax = 2046-1023-106+1;
+	fpi.rounding = Rounding;
+	fpi.sudden_underflow = 0;
+	fpi.int_max = Int_max;
+	i = STRTOG_Normal;
+	s = gdtoa(&fpi, ex, bits, &i, mode, ndig, &decpt, &se);
+	b = g__fmt(buf, s, se, decpt, z->sign, bufsize);
+	Bfree(z);
+	return b;
+	}
diff --git a/libraries/gdtoa/g_dfmt.c b/libraries/gdtoa/g_dfmt.c
new file mode 100644
index 000000000..d8e1438c4
--- /dev/null
+++ b/libraries/gdtoa/g_dfmt.c
@@ -0,0 +1,95 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+ char*
+#ifdef KR_headers
+g_dfmt(buf, d, ndig, bufsize) char *buf; double *d; int ndig; size_t bufsize;
+#else
+g_dfmt(char *buf, double *d, int ndig, size_t bufsize)
+#endif
+{
+	static FPI fpi0 = { 53, 1-1023-53+1, 2046-1023-53+1, 1, 0, Int_max };
+	char *b, *s, *se;
+	ULong bits[2], *L, sign;
+	int decpt, ex, i, mode;
+#ifdef Honor_FLT_ROUNDS
+#include "gdtoa_fltrnds.h"
+#else
+#define fpi &fpi0
+#endif
+
+	if (ndig < 0)
+		ndig = 0;
+	if (bufsize < (size_t)(ndig + 10))
+		return 0;
+
+	L = (ULong*)d;
+	sign = L[_0] & 0x80000000L;
+	if ((L[_0] & 0x7ff00000) == 0x7ff00000) {
+		/* Infinity or NaN */
+		if (bufsize < 10)
+			return 0;
+		if (L[_0] & 0xfffff || L[_1]) {
+			return strcp(buf, "NaN");
+			}
+		b = buf;
+		if (sign)
+			*b++ = '-';
+		return strcp(b, "Infinity");
+		}
+	if (L[_1] == 0 && (L[_0] ^ sign) == 0 /*d == 0.*/) {
+		b = buf;
+#ifndef IGNORE_ZERO_SIGN
+		if (L[_0] & 0x80000000L)
+			*b++ = '-';
+#endif
+		*b++ = '0';
+		*b = 0;
+		return b;
+		}
+	bits[0] = L[_1];
+	bits[1] = L[_0] & 0xfffff;
+	if ( (ex = (L[_0] >> 20) & 0x7ff) !=0)
+		bits[1] |= 0x100000;
+	else
+		ex = 1;
+	ex -= 0x3ff + 52;
+	mode = 2;
+	if (ndig <= 0)
+		mode = 0;
+	i = STRTOG_Normal;
+	if (sign)
+		i = STRTOG_Normal | STRTOG_Neg;
+	s = gdtoa(fpi, ex, bits, &i, mode, ndig, &decpt, &se);
+	return g__fmt(buf, s, se, decpt, sign, bufsize);
+	}
diff --git a/libraries/gdtoa/g_ffmt.c b/libraries/gdtoa/g_ffmt.c
new file mode 100644
index 000000000..30b53ae7e
--- /dev/null
+++ b/libraries/gdtoa/g_ffmt.c
@@ -0,0 +1,93 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+ char*
+#ifdef KR_headers
+g_ffmt(buf, f, ndig, bufsize) char *buf; float *f; int ndig; size_t bufsize;
+#else
+g_ffmt(char *buf, float *f, int ndig, size_t bufsize)
+#endif
+{
+	static FPI fpi0 = { 24, 1-127-24+1,  254-127-24+1, 1, 0, 6 };
+	char *b, *s, *se;
+	ULong bits[1], *L, sign;
+	int decpt, ex, i, mode;
+#ifdef Honor_FLT_ROUNDS
+#include "gdtoa_fltrnds.h"
+#else
+#define fpi &fpi0
+#endif
+
+	if (ndig < 0)
+		ndig = 0;
+	if (bufsize < (size_t)(ndig + 10))
+		return 0;
+
+	L = (ULong*)f;
+	sign = L[0] & 0x80000000L;
+	if ((L[0] & 0x7f800000) == 0x7f800000) {
+		/* Infinity or NaN */
+		if (L[0] & 0x7fffff) {
+			return strcp(buf, "NaN");
+			}
+		b = buf;
+		if (sign)
+			*b++ = '-';
+		return strcp(b, "Infinity");
+		}
+	if (*f == 0.) {
+		b = buf;
+#ifndef IGNORE_ZERO_SIGN
+		if (L[0] & 0x80000000L)
+			*b++ = '-';
+#endif
+		*b++ = '0';
+		*b = 0;
+		return b;
+		}
+	bits[0] = L[0] & 0x7fffff;
+	if ( (ex = (L[0] >> 23) & 0xff) !=0)
+		bits[0] |= 0x800000;
+	else
+		ex = 1;
+	ex -= 0x7f + 23;
+	mode = 2;
+	if (ndig <= 0) {
+		if (bufsize < 16)
+			return 0;
+		mode = 0;
+		}
+	i = STRTOG_Normal;
+	s = gdtoa(fpi, ex, bits, &i, mode, ndig, &decpt, &se);
+	return g__fmt(buf, s, se, decpt, sign, bufsize);
+	}
diff --git a/libraries/gdtoa/g_xLfmt.c b/libraries/gdtoa/g_xLfmt.c
new file mode 100644
index 000000000..5cda8d59e
--- /dev/null
+++ b/libraries/gdtoa/g_xLfmt.c
@@ -0,0 +1,113 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+#undef _0
+#undef _1
+
+/* one or the other of IEEE_MC68k or IEEE_8087 should be #defined */
+
+#ifdef IEEE_MC68k
+#define _0 0
+#define _1 1
+#define _2 2
+#endif
+#ifdef IEEE_8087
+#define _0 2
+#define _1 1
+#define _2 0
+#endif
+
+ char*
+#ifdef KR_headers
+g_xLfmt(buf, V, ndig, bufsize) char *buf; char *V; int ndig; size_t bufsize;
+#else
+g_xLfmt(char *buf, void *V, int ndig, size_t bufsize)
+#endif
+{
+	static FPI fpi0 = { 64, 1-16383-64+1, 32766 - 16383 - 64 + 1, 1, 0, Int_max };
+	char *b, *s, *se;
+	ULong bits[2], *L, sign;
+	int decpt, ex, i, mode;
+#ifdef Honor_FLT_ROUNDS
+#include "gdtoa_fltrnds.h"
+#else
+#define fpi &fpi0
+#endif
+
+	if (ndig < 0)
+		ndig = 0;
+	if (bufsize < (size_t)(ndig + 10))
+		return 0;
+
+	L = (ULong*)V;
+	sign = L[_0] & 0x80000000L;
+	bits[1] = L[_1];
+	bits[0] = L[_2];
+	if ( (ex = (L[_0] >> 16) & 0x7fff) !=0) {
+		if (ex == 0x7fff) {
+			/* Infinity or NaN */
+			if (bits[0] | bits[1])
+				b = strcp(buf, "NaN");
+			else {
+				b = buf;
+				if (sign)
+					*b++ = '-';
+				b = strcp(b, "Infinity");
+				}
+			return b;
+			}
+		i = STRTOG_Normal;
+		}
+	else if (bits[0] | bits[1]) {
+		i = STRTOG_Denormal;
+		}
+	else {
+		b = buf;
+#ifndef IGNORE_ZERO_SIGN
+		if (sign)
+			*b++ = '-';
+#endif
+		*b++ = '0';
+		*b = 0;
+		return b;
+		}
+	ex -= 0x3fff + 63;
+	mode = 2;
+	if (ndig <= 0) {
+		if (bufsize < 32)
+			return 0;
+		mode = 0;
+		}
+	s = gdtoa(fpi, ex, bits, &i, mode, ndig, &decpt, &se);
+	return g__fmt(buf, s, se, decpt, sign, bufsize);
+	}
diff --git a/libraries/gdtoa/g_xfmt.c b/libraries/gdtoa/g_xfmt.c
new file mode 100644
index 000000000..a0baa518c
--- /dev/null
+++ b/libraries/gdtoa/g_xfmt.c
@@ -0,0 +1,119 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+#undef _0
+#undef _1
+
+/* one or the other of IEEE_MC68k or IEEE_8087 should be #defined */
+
+#ifdef IEEE_MC68k
+#define _0 0
+#define _1 1
+#define _2 2
+#define _3 3
+#define _4 4
+#endif
+#ifdef IEEE_8087
+#define _0 4
+#define _1 3
+#define _2 2
+#define _3 1
+#define _4 0
+#endif
+
+ char*
+#ifdef KR_headers
+g_xfmt(buf, V, ndig, bufsize) char *buf; char *V; int ndig; size_t bufsize;
+#else
+g_xfmt(char *buf, void *V, int ndig, size_t bufsize)
+#endif
+{
+	static FPI fpi0 = { 64, 1-16383-64+1, 32766 - 16383 - 64 + 1, 1, 0, Int_max };
+	char *b, *s, *se;
+	ULong bits[2], sign;
+	UShort *L;
+	int decpt, ex, i, mode;
+#ifdef Honor_FLT_ROUNDS
+#include "gdtoa_fltrnds.h"
+#else
+#define fpi &fpi0
+#endif
+
+	if (ndig < 0)
+		ndig = 0;
+	if (bufsize < (size_t)(ndig + 10))
+		return 0;
+
+	L = (UShort *)V;
+	sign = L[_0] & 0x8000;
+	bits[1] = (L[_1] << 16) | L[_2];
+	bits[0] = (L[_3] << 16) | L[_4];
+	if ( (ex = L[_0] & 0x7fff) !=0) {
+		if (ex == 0x7fff) {
+			/* Infinity or NaN */
+			if (!bits[0] && bits[1]== 0x80000000) {
+				b = buf;
+				if (sign)
+					*b++ = '-';
+				b = strcp(b, "Infinity");
+				}
+			else
+				b = strcp(buf, "NaN");
+			return b;
+			}
+		i = STRTOG_Normal;
+		}
+	else if (bits[0] | bits[1]) {
+		i = STRTOG_Denormal;
+		ex = 1;
+		}
+	else {
+		b = buf;
+#ifndef IGNORE_ZERO_SIGN
+		if (sign)
+			*b++ = '-';
+#endif
+		*b++ = '0';
+		*b = 0;
+		return b;
+		}
+	ex -= 0x3fff + 63;
+	mode = 2;
+	if (ndig <= 0) {
+		if (bufsize < 32)
+			return 0;
+		mode = 0;
+		}
+	s = gdtoa(fpi, ex, bits, &i, mode, ndig, &decpt, &se);
+	return g__fmt(buf, s, se, decpt, sign, bufsize);
+	}
diff --git a/libraries/gdtoa/gdtoa.c b/libraries/gdtoa/gdtoa.c
new file mode 100644
index 000000000..a4759968a
--- /dev/null
+++ b/libraries/gdtoa/gdtoa.c
@@ -0,0 +1,764 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998, 1999 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+ static Bigint *
+#ifdef KR_headers
+bitstob(bits, nbits, bbits) ULong *bits; int nbits; int *bbits;
+#else
+bitstob(ULong *bits, int nbits, int *bbits)
+#endif
+{
+	int i, k;
+	Bigint *b;
+	ULong *be, *x, *x0;
+
+	i = ULbits;
+	k = 0;
+	while(i < nbits) {
+		i <<= 1;
+		k++;
+		}
+#ifndef Pack_32
+	if (!k)
+		k = 1;
+#endif
+	b = Balloc(k);
+	be = bits + ((nbits - 1) >> kshift);
+	x = x0 = b->x;
+	do {
+		*x++ = *bits & ALL_ON;
+#ifdef Pack_16
+		*x++ = (*bits >> 16) & ALL_ON;
+#endif
+		} while(++bits <= be);
+	i = x - x0;
+	while(!x0[--i])
+		if (!i) {
+			b->wds = 0;
+			*bbits = 0;
+			goto ret;
+			}
+	b->wds = i + 1;
+	*bbits = i*ULbits + 32 - hi0bits(b->x[i]);
+ ret:
+	return b;
+	}
+
+/* dtoa for IEEE arithmetic (dmg): convert double to ASCII string.
+ *
+ * Inspired by "How to Print Floating-Point Numbers Accurately" by
+ * Guy L. Steele, Jr. and Jon L. White [Proc. ACM SIGPLAN '90, pp. 112-126].
+ *
+ * Modifications:
+ *	1. Rather than iterating, we use a simple numeric overestimate
+ *	   to determine k = floor(log10(d)).  We scale relevant
+ *	   quantities using O(log2(k)) rather than O(k) multiplications.
+ *	2. For some modes > 2 (corresponding to ecvt and fcvt), we don't
+ *	   try to generate digits strictly left to right.  Instead, we
+ *	   compute with fewer bits and propagate the carry if necessary
+ *	   when rounding the final digit up.  This is often faster.
+ *	3. Under the assumption that input will be rounded nearest,
+ *	   mode 0 renders 1e23 as 1e23 rather than 9.999999999999999e22.
+ *	   That is, we allow equality in stopping tests when the
+ *	   round-nearest rule will give the same floating-point value
+ *	   as would satisfaction of the stopping test with strict
+ *	   inequality.
+ *	4. We remove common factors of powers of 2 from relevant
+ *	   quantities.
+ *	5. When converting floating-point integers less than 1e16,
+ *	   we use floating-point arithmetic rather than resorting
+ *	   to multiple-precision integers.
+ *	6. When asked to produce fewer than 15 digits, we first try
+ *	   to get by with floating-point arithmetic; we resort to
+ *	   multiple-precision integer arithmetic only if we cannot
+ *	   guarantee that the floating-point calculation has given
+ *	   the correctly rounded result.  For k requested digits and
+ *	   "uniformly" distributed input, the probability is
+ *	   something like 10^(k-15) that we must resort to the Long
+ *	   calculation.
+ */
+
+ char *
+gdtoa
+#ifdef KR_headers
+	(fpi, be, bits, kindp, mode, ndigits, decpt, rve)
+	FPI *fpi; int be; ULong *bits;
+	int *kindp, mode, ndigits, *decpt; char **rve;
+#else
+	(FPI *fpi, int be, ULong *bits, int *kindp, int mode, int ndigits, int *decpt, char **rve)
+#endif
+{
+ /*	Arguments ndigits and decpt are similar to the second and third
+	arguments of ecvt and fcvt; trailing zeros are suppressed from
+	the returned string.  If not null, *rve is set to point
+	to the end of the return value.  If d is +-Infinity or NaN,
+	then *decpt is set to 9999.
+	be = exponent: value = (integer represented by bits) * (2 to the power of be).
+
+	mode:
+		0 ==> shortest string that yields d when read in
+			and rounded to nearest.
+		1 ==> like 0, but with Steele & White stopping rule;
+			e.g. with IEEE P754 arithmetic , mode 0 gives
+			1e23 whereas mode 1 gives 9.999999999999999e22.
+		2 ==> max(1,ndigits) significant digits.  This gives a
+			return value similar to that of ecvt, except
+			that trailing zeros are suppressed.
+		3 ==> through ndigits past the decimal point.  This
+			gives a return value similar to that from fcvt,
+			except that trailing zeros are suppressed, and
+			ndigits can be negative.
+		4-9 should give the same return values as 2-3, i.e.,
+			4 <= mode <= 9 ==> same return as mode
+			2 + (mode & 1).  These modes are mainly for
+			debugging; often they run slower but sometimes
+			faster than modes 2-3.
+		4,5,8,9 ==> left-to-right digit generation.
+		6-9 ==> don't try fast floating-point estimate
+			(if applicable).
+
+		Values of mode other than 0-9 are treated as mode 0.
+
+		Sufficient space is allocated to the return value
+		to hold the suppressed trailing zeros.
+	*/
+
+	int bbits, b2, b5, be0, dig, i, ieps, ilim, ilim0, ilim1, inex;
+	int j, j1, k, k0, k_check, kind, leftright, m2, m5, nbits;
+	int rdir, s2, s5, spec_case, try_quick;
+	Long L;
+	Bigint *b, *b1, *delta, *mlo, *mhi, *mhi1, *S;
+	double d2, ds;
+	char *s, *s0;
+	U d, eps;
+
+#ifndef MULTIPLE_THREADS
+	if (dtoa_result) {
+		freedtoa(dtoa_result);
+		dtoa_result = 0;
+		}
+#endif
+	inex = 0;
+	kind = *kindp &= ~STRTOG_Inexact;
+	switch(kind & STRTOG_Retmask) {
+	  case STRTOG_Zero:
+		goto ret_zero;
+	  case STRTOG_Normal:
+	  case STRTOG_Denormal:
+		break;
+	  case STRTOG_Infinite:
+		*decpt = -32768;
+		return nrv_alloc("Infinity", rve, 8);
+	  case STRTOG_NaN:
+		*decpt = -32768;
+		return nrv_alloc("NaN", rve, 3);
+	  default:
+		return 0;
+	  }
+	b = bitstob(bits, nbits = fpi->nbits, &bbits);
+	be0 = be;
+	if ( (i = trailz(b)) !=0) {
+		rshift(b, i);
+		be += i;
+		bbits -= i;
+		}
+	if (!b->wds) {
+		Bfree(b);
+ ret_zero:
+		*decpt = 1;
+		return nrv_alloc("0", rve, 1);
+		}
+
+	dval(&d) = b2d(b, &i);
+	i = be + bbits - 1;
+	word0(&d) &= Frac_mask1;
+	word0(&d) |= Exp_11;
+#ifdef IBM
+	if ( (j = 11 - hi0bits(word0(&d) & Frac_mask)) !=0)
+		dval(&d) /= 1 << j;
+#endif
+
+	/* log(x)	~=~ log(1.5) + (x-1.5)/1.5
+	 * log10(x)	 =  log(x) / log(10)
+	 *		~=~ log(1.5)/log(10) + (x-1.5)/(1.5*log(10))
+	 * log10(&d) = (i-Bias)*log(2)/log(10) + log10(d2)
+	 *
+	 * This suggests computing an approximation k to log10(&d) by
+	 *
+	 * k = (i - Bias)*0.301029995663981
+	 *	+ ( (d2-1.5)*0.289529654602168 + 0.176091259055681 );
+	 *
+	 * We want k to be too large rather than too small.
+	 * The error in the first-order Taylor series approximation
+	 * is in our favor, so we just round up the constant enough
+	 * to compensate for any error in the multiplication of
+	 * (i - Bias) by 0.301029995663981; since |i - Bias| <= 1077,
+	 * and 1077 * 0.30103 * 2^-52 ~=~ 7.2e-14,
+	 * adding 1e-13 to the constant term more than suffices.
+	 * Hence we adjust the constant term to 0.1760912590558.
+	 * (We could get a more accurate k by invoking log10,
+	 *  but this is probably not worthwhile.)
+	 */
+#ifdef IBM
+	i <<= 2;
+	i += j;
+#endif
+	ds = (dval(&d)-1.5)*0.289529654602168 + 0.1760912590558 + i*0.301029995663981;
+
+	/* correct assumption about exponent range */
+	if ((j = i) < 0)
+		j = -j;
+	if ((j -= 1077) > 0)
+		ds += j * 7e-17;
+
+	k = (int)ds;
+	if (ds < 0. && ds != k)
+		k--;	/* want k = floor(ds) */
+	k_check = 1;
+#ifdef IBM
+	j = be + bbits - 1;
+	if ( (j1 = j & 3) !=0)
+		dval(&d) *= 1 << j1;
+	word0(&d) += j << Exp_shift - 2 & Exp_mask;
+#else
+	word0(&d) += (be + bbits - 1) << Exp_shift;
+#endif
+	if (k >= 0 && k <= Ten_pmax) {
+		if (dval(&d) < tens[k])
+			k--;
+		k_check = 0;
+		}
+	j = bbits - i - 1;
+	if (j >= 0) {
+		b2 = 0;
+		s2 = j;
+		}
+	else {
+		b2 = -j;
+		s2 = 0;
+		}
+	if (k >= 0) {
+		b5 = 0;
+		s5 = k;
+		s2 += k;
+		}
+	else {
+		b2 -= k;
+		b5 = -k;
+		s5 = 0;
+		}
+	if (mode < 0 || mode > 9)
+		mode = 0;
+	try_quick = 1;
+	if (mode > 5) {
+		mode -= 4;
+		try_quick = 0;
+		}
+	else if (i >= -4 - Emin || i < Emin)
+		try_quick = 0;
+	leftright = 1;
+	ilim = ilim1 = -1;	/* Values for cases 0 and 1; done here to */
+				/* silence erroneous "gcc -Wall" warning. */
+	switch(mode) {
+		case 0:
+		case 1:
+			i = (int)(nbits * .30103) + 3;
+			ndigits = 0;
+			break;
+		case 2:
+			leftright = 0;
+			/* no break */
+		case 4:
+			if (ndigits <= 0)
+				ndigits = 1;
+			ilim = ilim1 = i = ndigits;
+			break;
+		case 3:
+			leftright = 0;
+			/* no break */
+		case 5:
+			i = ndigits + k + 1;
+			ilim = i;
+			ilim1 = i - 1;
+			if (i <= 0)
+				i = 1;
+		}
+	s = s0 = rv_alloc(i);
+
+	if ( (rdir = fpi->rounding - 1) !=0) {
+		if (rdir < 0)
+			rdir = 2;
+		if (kind & STRTOG_Neg)
+			rdir = 3 - rdir;
+		}
+
+	/* Now rdir = 0 ==> round near, 1 ==> round up, 2 ==> round down. */
+
+	if (ilim >= 0 && ilim <= Quick_max && try_quick && !rdir
+#ifndef IMPRECISE_INEXACT
+		&& k == 0
+#endif
+								) {
+
+		/* Try to get by with floating-point arithmetic. */
+
+		i = 0;
+		d2 = dval(&d);
+#ifdef IBM
+		if ( (j = 11 - hi0bits(word0(&d) & Frac_mask)) !=0)
+			dval(&d) /= 1 << j;
+#endif
+		k0 = k;
+		ilim0 = ilim;
+		ieps = 2; /* conservative */
+		if (k > 0) {
+			ds = tens[k&0xf];
+			j = k >> 4;
+			if (j & Bletch) {
+				/* prevent overflows */
+				j &= Bletch - 1;
+				dval(&d) /= bigtens[n_bigtens-1];
+				ieps++;
+				}
+			for(; j; j >>= 1, i++)
+				if (j & 1) {
+					ieps++;
+					ds *= bigtens[i];
+					}
+			}
+		else  {
+			ds = 1.;
+			if ( (j1 = -k) !=0) {
+				dval(&d) *= tens[j1 & 0xf];
+				for(j = j1 >> 4; j; j >>= 1, i++)
+					if (j & 1) {
+						ieps++;
+						dval(&d) *= bigtens[i];
+						}
+				}
+			}
+		if (k_check && dval(&d) < 1. && ilim > 0) {
+			if (ilim1 <= 0)
+				goto fast_failed;
+			ilim = ilim1;
+			k--;
+			dval(&d) *= 10.;
+			ieps++;
+			}
+		dval(&eps) = ieps*dval(&d) + 7.;
+		word0(&eps) -= (P-1)*Exp_msk1;
+		if (ilim == 0) {
+			S = mhi = 0;
+			dval(&d) -= 5.;
+			if (dval(&d) > dval(&eps))
+				goto one_digit;
+			if (dval(&d) < -dval(&eps))
+				goto no_digits;
+			goto fast_failed;
+			}
+#ifndef No_leftright
+		if (leftright) {
+			/* Use Steele & White method of only
+			 * generating digits needed.
+			 */
+			dval(&eps) = ds*0.5/tens[ilim-1] - dval(&eps);
+			for(i = 0;;) {
+				L = (Long)(dval(&d)/ds);
+				dval(&d) -= L*ds;
+				*s++ = '0' + (int)L;
+				if (dval(&d) < dval(&eps)) {
+					if (dval(&d))
+						inex = STRTOG_Inexlo;
+					goto ret1;
+					}
+				if (ds - dval(&d) < dval(&eps))
+					goto bump_up;
+				if (++i >= ilim)
+					break;
+				dval(&eps) *= 10.;
+				dval(&d) *= 10.;
+				}
+			}
+		else {
+#endif
+			/* Generate ilim digits, then fix them up. */
+			dval(&eps) *= tens[ilim-1];
+			for(i = 1;; i++, dval(&d) *= 10.) {
+				if ( (L = (Long)(dval(&d)/ds)) !=0)
+					dval(&d) -= L*ds;
+				*s++ = '0' + (int)L;
+				if (i == ilim) {
+					ds *= 0.5;
+					if (dval(&d) > ds + dval(&eps))
+						goto bump_up;
+					else if (dval(&d) < ds - dval(&eps)) {
+						if (dval(&d))
+							inex = STRTOG_Inexlo;
+						goto clear_trailing0;
+						}
+					break;
+					}
+				}
+#ifndef No_leftright
+			}
+#endif
+ fast_failed:
+		s = s0;
+		dval(&d) = d2;
+		k = k0;
+		ilim = ilim0;
+		}
+
+	/* Do we have a "small" integer? */
+
+	if (be >= 0 && k <= fpi->int_max) {
+		/* Yes. */
+		ds = tens[k];
+		if (ndigits < 0 && ilim <= 0) {
+			S = mhi = 0;
+			if (ilim < 0 || dval(&d) <= 5*ds)
+				goto no_digits;
+			goto one_digit;
+			}
+		for(i = 1;; i++, dval(&d) *= 10.) {
+			L = (Long)(dval(&d) / ds);
+			dval(&d) -= L*ds;
+#ifdef Check_FLT_ROUNDS
+			/* If FLT_ROUNDS == 2, L will usually be high by 1 */
+			if (dval(&d) < 0) {
+				L--;
+				dval(&d) += ds;
+				}
+#endif
+			*s++ = '0' + (int)L;
+			if (dval(&d) == 0.)
+				break;
+			if (i == ilim) {
+				if (rdir) {
+					if (rdir == 1)
+						goto bump_up;
+					inex = STRTOG_Inexlo;
+					goto ret1;
+					}
+				dval(&d) += dval(&d);
+#ifdef ROUND_BIASED
+				if (dval(&d) >= ds)
+#else
+				if (dval(&d) > ds || (dval(&d) == ds && L & 1))
+#endif
+					{
+ bump_up:
+					inex = STRTOG_Inexhi;
+					while(*--s == '9')
+						if (s == s0) {
+							k++;
+							*s = '0';
+							break;
+							}
+					++*s++;
+					}
+				else {
+					inex = STRTOG_Inexlo;
+ clear_trailing0:
+					while(*--s == '0'){}
+					++s;
+					}
+				break;
+				}
+			}
+		goto ret1;
+		}
+
+	m2 = b2;
+	m5 = b5;
+	mhi = mlo = 0;
+	if (leftright) {
+		i = nbits - bbits;
+		if (be - i++ < fpi->emin && mode != 3 && mode != 5) {
+			/* denormal */
+			i = be - fpi->emin + 1;
+			if (mode >= 2 && ilim > 0 && ilim < i)
+				goto small_ilim;
+			}
+		else if (mode >= 2) {
+ small_ilim:
+			j = ilim - 1;
+			if (m5 >= j)
+				m5 -= j;
+			else {
+				s5 += j -= m5;
+				b5 += j;
+				m5 = 0;
+				}
+			if ((i = ilim) < 0) {
+				m2 -= i;
+				i = 0;
+				}
+			}
+		b2 += i;
+		s2 += i;
+		mhi = i2b(1);
+		}
+	if (m2 > 0 && s2 > 0) {
+		i = m2 < s2 ? m2 : s2;
+		b2 -= i;
+		m2 -= i;
+		s2 -= i;
+		}
+	if (b5 > 0) {
+		if (leftright) {
+			if (m5 > 0) {
+				mhi = pow5mult(mhi, m5);
+				b1 = mult(mhi, b);
+				Bfree(b);
+				b = b1;
+				}
+			if ( (j = b5 - m5) !=0)
+				b = pow5mult(b, j);
+			}
+		else
+			b = pow5mult(b, b5);
+		}
+	S = i2b(1);
+	if (s5 > 0)
+		S = pow5mult(S, s5);
+
+	/* Check for special case that d is a normalized power of 2. */
+
+	spec_case = 0;
+	if (mode < 2) {
+		if (bbits == 1 && be0 > fpi->emin + 1) {
+			/* The special case */
+			b2++;
+			s2++;
+			spec_case = 1;
+			}
+		}
+
+	/* Arrange for convenient computation of quotients:
+	 * shift left if necessary so divisor has 4 leading 0 bits.
+	 *
+	 * Perhaps we should just compute leading 28 bits of S once
+	 * and for all and pass them and a shift to quorem, so it
+	 * can do shifts and ors to compute the numerator for q.
+	 */
+	i = ((s5 ? hi0bits(S->x[S->wds-1]) : ULbits - 1) - s2 - 4) & kmask;
+	m2 += i;
+	if ((b2 += i) > 0)
+		b = lshift(b, b2);
+	if ((s2 += i) > 0)
+		S = lshift(S, s2);
+	if (k_check) {
+		if (cmp(b,S) < 0) {
+			k--;
+			b = multadd(b, 10, 0);	/* we botched the k estimate */
+			if (leftright)
+				mhi = multadd(mhi, 10, 0);
+			ilim = ilim1;
+			}
+		}
+	if (ilim <= 0 && mode > 2) {
+		if (ilim < 0 || cmp(b,S = multadd(S,5,0)) <= 0) {
+			/* no digits, fcvt style */
+ no_digits:
+			k = -1 - ndigits;
+			inex = STRTOG_Inexlo;
+			goto ret;
+			}
+ one_digit:
+		inex = STRTOG_Inexhi;
+		*s++ = '1';
+		k++;
+		goto ret;
+		}
+	if (leftright) {
+		if (m2 > 0)
+			mhi = lshift(mhi, m2);
+
+		/* Compute mlo -- check for special case
+		 * that d is a normalized power of 2.
+		 */
+
+		mlo = mhi;
+		if (spec_case) {
+			mhi = Balloc(mhi->k);
+			Bcopy(mhi, mlo);
+			mhi = lshift(mhi, 1);
+			}
+
+		for(i = 1;;i++) {
+			dig = quorem(b,S) + '0';
+			/* Do we yet have the shortest decimal string
+			 * that will round to d?
+			 */
+			j = cmp(b, mlo);
+			delta = diff(S, mhi);
+			j1 = delta->sign ? 1 : cmp(b, delta);
+			Bfree(delta);
+#ifndef ROUND_BIASED
+			if (j1 == 0 && !mode && !(bits[0] & 1) && !rdir) {
+				if (dig == '9')
+					goto round_9_up;
+				if (j <= 0) {
+					if (b->wds > 1 || b->x[0])
+						inex = STRTOG_Inexlo;
+					}
+				else {
+					dig++;
+					inex = STRTOG_Inexhi;
+					}
+				*s++ = dig;
+				goto ret;
+				}
+#endif
+			if (j < 0 || (j == 0 && !mode
+#ifndef ROUND_BIASED
+							&& !(bits[0] & 1)
+#endif
+					)) {
+				if (rdir && (b->wds > 1 || b->x[0])) {
+					if (rdir == 2) {
+						inex = STRTOG_Inexlo;
+						goto accept;
+						}
+					while (cmp(S,mhi) > 0) {
+						*s++ = dig;
+						mhi1 = multadd(mhi, 10, 0);
+						if (mlo == mhi)
+							mlo = mhi1;
+						mhi = mhi1;
+						b = multadd(b, 10, 0);
+						dig = quorem(b,S) + '0';
+						}
+					if (dig++ == '9')
+						goto round_9_up;
+					inex = STRTOG_Inexhi;
+					goto accept;
+					}
+				if (j1 > 0) {
+					b = lshift(b, 1);
+					j1 = cmp(b, S);
+#ifdef ROUND_BIASED
+					if (j1 >= 0 /*)*/
+#else
+					if ((j1 > 0 || (j1 == 0 && dig & 1))
+#endif
+					&& dig++ == '9')
+						goto round_9_up;
+					inex = STRTOG_Inexhi;
+					}
+				if (b->wds > 1 || b->x[0])
+					inex = STRTOG_Inexlo;
+ accept:
+				*s++ = dig;
+				goto ret;
+				}
+			if (j1 > 0 && rdir != 2) {
+				if (dig == '9') { /* possible if i == 1 */
+ round_9_up:
+					*s++ = '9';
+					inex = STRTOG_Inexhi;
+					goto roundoff;
+					}
+				inex = STRTOG_Inexhi;
+				*s++ = dig + 1;
+				goto ret;
+				}
+			*s++ = dig;
+			if (i == ilim)
+				break;
+			b = multadd(b, 10, 0);
+			if (mlo == mhi)
+				mlo = mhi = multadd(mhi, 10, 0);
+			else {
+				mlo = multadd(mlo, 10, 0);
+				mhi = multadd(mhi, 10, 0);
+				}
+			}
+		}
+	else
+		for(i = 1;; i++) {
+			*s++ = dig = quorem(b,S) + '0';
+			if (i >= ilim)
+				break;
+			b = multadd(b, 10, 0);
+			}
+
+	/* Round off last digit */
+
+	if (rdir) {
+		if (rdir == 2 || (b->wds <= 1 && !b->x[0]))
+			goto chopzeros;
+		goto roundoff;
+		}
+	b = lshift(b, 1);
+	j = cmp(b, S);
+#ifdef ROUND_BIASED
+	if (j >= 0)
+#else
+	if (j > 0 || (j == 0 && dig & 1))
+#endif
+		{
+ roundoff:
+		inex = STRTOG_Inexhi;
+		while(*--s == '9')
+			if (s == s0) {
+				k++;
+				*s++ = '1';
+				goto ret;
+				}
+		++*s++;
+		}
+	else {
+ chopzeros:
+		if (b->wds > 1 || b->x[0])
+			inex = STRTOG_Inexlo;
+		while(*--s == '0'){}
+		++s;
+		}
+ ret:
+	Bfree(S);
+	if (mhi) {
+		if (mlo && mlo != mhi)
+			Bfree(mlo);
+		Bfree(mhi);
+		}
+ ret1:
+	Bfree(b);
+	*s = 0;
+	*decpt = k + 1;
+	if (rve)
+		*rve = s;
+	*kindp |= inex;
+	return s0;
+	}
diff --git a/libraries/gdtoa/gdtoa.h b/libraries/gdtoa/gdtoa.h
new file mode 100644
index 000000000..8b7390a28
--- /dev/null
+++ b/libraries/gdtoa/gdtoa.h
@@ -0,0 +1,194 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#ifndef GDTOA_H_INCLUDED
+#define GDTOA_H_INCLUDED
+
+#if defined(_MSC_VER)
+/* [RH] Generating arith.h strikes me as too cumbersome under Visual
+ * Studio, so here's the equivalent, given the limited number of
+ * architectures that MSC can target. (Itanium? Who cares about that?)
+ */
+#define IEEE_8087
+#define Arith_Kind_ASL 1
+#define Double_Align
+#ifdef _M_X64
+#define X64_bit_pointers
+#endif
+#elif defined(__APPLE__)
+/* [BL] While generating the files may be easy, on OS X we have cross
+ * compiling to deal with, which means we can't run the generation
+ * program on the target.
+ */
+#if defined(__x86_64__)
+#define IEEE_8087
+#define Arith_Kind_ASL 1
+#define Long int
+#define Intcast (int)(long)
+#define Double_Align
+#define X64_bit_pointers
+#elif defined(__i386__)
+#define IEEE_8087
+#define Arith_Kind_ASL 1
+#else
+#define IEEE_MC68k
+#define Arith_Kind_ASL 2
+#define Double_Align
+#endif
+#else
+#include "arith.h"
+#endif
+#include <stddef.h> /* for size_t */
+
+#ifndef Long
+#define Long int
+#endif
+#ifndef ULong
+typedef unsigned Long ULong;
+#endif
+#ifndef UShort
+typedef unsigned short UShort;
+#endif
+
+#ifndef ANSI
+#ifdef KR_headers
+#define ANSI(x) ()
+#define Void /*nothing*/
+#else
+#define ANSI(x) x
+#define Void void
+#endif
+#endif /* ANSI */
+
+#ifndef CONST
+#ifdef KR_headers
+#define CONST /* blank */
+#else
+#define CONST const
+#endif
+#endif /* CONST */
+
+ enum {	/* return values from strtodg */
+	STRTOG_Zero	= 0,
+	STRTOG_Normal	= 1,
+	STRTOG_Denormal	= 2,
+	STRTOG_Infinite	= 3,
+	STRTOG_NaN	= 4,
+	STRTOG_NaNbits	= 5,
+	STRTOG_NoNumber	= 6,
+	STRTOG_Retmask	= 7,
+
+	/* The following may be or-ed into one of the above values. */
+
+	STRTOG_Neg	= 0x08, /* does not affect STRTOG_Inexlo or STRTOG_Inexhi */
+	STRTOG_Inexlo	= 0x10,	/* returned result rounded toward zero */
+	STRTOG_Inexhi	= 0x20, /* returned result rounded away from zero */
+	STRTOG_Inexact	= 0x30,
+	STRTOG_Underflow= 0x40,
+	STRTOG_Overflow	= 0x80
+	};
+
+ typedef struct
+FPI {
+	int nbits;
+	int emin;
+	int emax;
+	int rounding;
+	int sudden_underflow;
+	int int_max;
+	} FPI;
+
+enum {	/* FPI.rounding values: same as FLT_ROUNDS */
+	FPI_Round_zero = 0,
+	FPI_Round_near = 1,
+	FPI_Round_up = 2,
+	FPI_Round_down = 3
+	};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern char* dtoa  ANSI((double d, int mode, int ndigits, int *decpt,
+			int *sign, char **rve));
+extern char* gdtoa ANSI((FPI *fpi, int be, ULong *bits, int *kindp,
+			int mode, int ndigits, int *decpt, char **rve));
+extern void freedtoa ANSI((char*));
+//extern float  strtof ANSI((CONST char *, char **));
+//extern double strtod ANSI((CONST char *, char **));
+extern int strtodg ANSI((CONST char*, char**, FPI*, Long*, ULong*));
+
+extern char*	g_ddfmt   ANSI((char*, double*, int, size_t));
+extern char*	g_ddfmt_p ANSI((char*, double*,	int, size_t, int));
+extern char*	g_dfmt    ANSI((char*, double*, int, size_t));
+extern char*	g_dfmt_p  ANSI((char*, double*,	int, size_t, int));
+extern char*	g_ffmt    ANSI((char*, float*,  int, size_t));
+extern char*	g_ffmt_p  ANSI((char*, float*,	int, size_t, int));
+extern char*	g_Qfmt    ANSI((char*, void*,   int, size_t));
+extern char*	g_Qfmt_p  ANSI((char*, void*,	int, size_t, int));
+extern char*	g_xfmt    ANSI((char*, void*,   int, size_t));
+extern char*	g_xfmt_p  ANSI((char*, void*,	int, size_t, int));
+extern char*	g_xLfmt   ANSI((char*, void*,   int, size_t));
+extern char*	g_xLfmt_p ANSI((char*, void*,	int, size_t, int));
+
+extern int	strtoId  ANSI((CONST char*, char**, double*, double*));
+extern int	strtoIdd ANSI((CONST char*, char**, double*, double*));
+extern int	strtoIf  ANSI((CONST char*, char**, float*, float*));
+extern int	strtoIQ  ANSI((CONST char*, char**, void*, void*));
+extern int	strtoIx  ANSI((CONST char*, char**, void*, void*));
+extern int	strtoIxL ANSI((CONST char*, char**, void*, void*));
+extern int	strtord  ANSI((CONST char*, char**, int, double*));
+extern int	strtordd ANSI((CONST char*, char**, int, double*));
+extern int	strtorf  ANSI((CONST char*, char**, int, float*));
+extern int	strtorQ  ANSI((CONST char*, char**, int, void*));
+extern int	strtorx  ANSI((CONST char*, char**, int, void*));
+extern int	strtorxL ANSI((CONST char*, char**, int, void*));
+#if 1
+extern int	strtodI  ANSI((CONST char*, char**, double*));
+extern int	strtopd  ANSI((CONST char*, char**, double*));
+extern int	strtopdd ANSI((CONST char*, char**, double*));
+extern int	strtopf  ANSI((CONST char*, char**, float*));
+extern int	strtopQ  ANSI((CONST char*, char**, void*));
+extern int	strtopx  ANSI((CONST char*, char**, void*));
+extern int	strtopxL ANSI((CONST char*, char**, void*));
+#else
+#define strtopd(s,se,x) strtord(s,se,1,x)
+#define strtopdd(s,se,x) strtordd(s,se,1,x)
+#define strtopf(s,se,x) strtorf(s,se,1,x)
+#define strtopQ(s,se,x) strtorQ(s,se,1,x)
+#define strtopx(s,se,x) strtorx(s,se,1,x)
+#define strtopxL(s,se,x) strtorxL(s,se,1,x)
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* GDTOA_H_INCLUDED */
diff --git a/libraries/gdtoa/gdtoa_fltrnds.h b/libraries/gdtoa/gdtoa_fltrnds.h
new file mode 100644
index 000000000..33e5f9e53
--- /dev/null
+++ b/libraries/gdtoa/gdtoa_fltrnds.h
@@ -0,0 +1,18 @@
+	FPI *fpi, fpi1;
+	int Rounding;
+#ifdef Trust_FLT_ROUNDS /*{{ only define this if FLT_ROUNDS really works! */
+	Rounding = Flt_Rounds;
+#else /*}{*/
+	Rounding = 1;
+	switch(fegetround()) {
+	  case FE_TOWARDZERO:	Rounding = 0; break;
+	  case FE_UPWARD:	Rounding = 2; break;
+	  case FE_DOWNWARD:	Rounding = 3;
+	  }
+#endif /*}}*/
+	fpi = &fpi0;
+	if (Rounding != 1) {
+		fpi1 = fpi0;
+		fpi = &fpi1;
+		fpi1.rounding = Rounding;
+		}
diff --git a/libraries/gdtoa/gdtoaimp.h b/libraries/gdtoa/gdtoaimp.h
new file mode 100644
index 000000000..c63bf3135
--- /dev/null
+++ b/libraries/gdtoa/gdtoaimp.h
@@ -0,0 +1,685 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998-2000 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* This is a variation on dtoa.c that converts arbitary binary
+   floating-point formats to and from decimal notation.  It uses
+   double-precision arithmetic internally, so there are still
+   various #ifdefs that adapt the calculations to the native
+   double-precision arithmetic (any of IEEE, VAX D_floating,
+   or IBM mainframe arithmetic).
+
+   Please send bug reports to David M. Gay (dmg at acm dot org,
+   with " at " changed at "@" and " dot " changed to ".").
+ */
+
+/* On a machine with IEEE extended-precision registers, it is
+ * necessary to specify double-precision (53-bit) rounding precision
+ * before invoking strtod or dtoa.  If the machine uses (the equivalent
+ * of) Intel 80x87 arithmetic, the call
+ *	_control87(PC_53, MCW_PC);
+ * does this with many compilers.  Whether this or another call is
+ * appropriate depends on the compiler; for this to work, it may be
+ * necessary to #include "float.h" or another system-dependent header
+ * file.
+ */
+
+/* strtod for IEEE-, VAX-, and IBM-arithmetic machines.
+ *
+ * This strtod returns a nearest machine number to the input decimal
+ * string (or sets errno to ERANGE).  With IEEE arithmetic, ties are
+ * broken by the IEEE round-even rule.  Otherwise ties are broken by
+ * biased rounding (add half and chop).
+ *
+ * Inspired loosely by William D. Clinger's paper "How to Read Floating
+ * Point Numbers Accurately" [Proc. ACM SIGPLAN '90, pp. 112-126].
+ *
+ * Modifications:
+ *
+ *	1. We only require IEEE, IBM, or VAX double-precision
+ *		arithmetic (not IEEE double-extended).
+ *	2. We get by with floating-point arithmetic in a case that
+ *		Clinger missed -- when we're computing d * 10^n
+ *		for a small integer d and the integer n is not too
+ *		much larger than 22 (the maximum integer k for which
+ *		we can represent 10^k exactly), we may be able to
+ *		compute (d*10^k) * 10^(e-k) with just one roundoff.
+ *	3. Rather than a bit-at-a-time adjustment of the binary
+ *		result in the hard case, we use floating-point
+ *		arithmetic to determine the adjustment to within
+ *		one bit; only in really hard cases do we need to
+ *		compute a second residual.
+ *	4. Because of 3., we don't need a large table of powers of 10
+ *		for ten-to-e (just some small tables, e.g. of 10^k
+ *		for 0 <= k <= 22).
+ */
+
+/*
+ * #define IEEE_8087 for IEEE-arithmetic machines where the least
+ *	significant byte has the lowest address.
+ * #define IEEE_MC68k for IEEE-arithmetic machines where the most
+ *	significant byte has the lowest address.
+ * #define Long int on machines with 32-bit ints and 64-bit longs.
+ * #define Sudden_Underflow for IEEE-format machines without gradual
+ *	underflow (i.e., that flush to zero on underflow).
+ * #define IBM for IBM mainframe-style floating-point arithmetic.
+ * #define VAX for VAX-style floating-point arithmetic (D_floating).
+ * #define No_leftright to omit left-right logic in fast floating-point
+ *	computation of dtoa and gdtoa.  This will cause modes 4 and 5 to be
+ *	treated the same as modes 2 and 3 for some inputs.
+ * #define Check_FLT_ROUNDS if FLT_ROUNDS can assume the values 2 or 3.
+ * #define RND_PRODQUOT to use rnd_prod and rnd_quot (assembly routines
+ *	that use extended-precision instructions to compute rounded
+ *	products and quotients) with IBM.
+ * #define ROUND_BIASED for IEEE-format with biased rounding and arithmetic
+ *	that rounds toward +Infinity.
+ * #define ROUND_BIASED_without_Round_Up for IEEE-format with biased
+ *	rounding when the underlying floating-point arithmetic uses
+ *	unbiased rounding.  This prevent using ordinary floating-point
+ *	arithmetic when the result could be computed with one rounding error.
+ * #define Inaccurate_Divide for IEEE-format with correctly rounded
+ *	products but inaccurate quotients, e.g., for Intel i860.
+ * #define NO_LONG_LONG on machines that do not have a "long long"
+ *	integer type (of >= 64 bits).  On such machines, you can
+ *	#define Just_16 to store 16 bits per 32-bit Long when doing
+ *	high-precision integer arithmetic.  Whether this speeds things
+ *	up or slows things down depends on the machine and the number
+ *	being converted.  If long long is available and the name is
+ *	something other than "long long", #define Llong to be the name,
+ *	and if "unsigned Llong" does not work as an unsigned version of
+ *	Llong, #define #ULLong to be the corresponding unsigned type.
+ * #define KR_headers for old-style C function headers.
+ * #define Bad_float_h if your system lacks a float.h or if it does not
+ *	define some or all of DBL_DIG, DBL_MAX_10_EXP, DBL_MAX_EXP,
+ *	FLT_RADIX, FLT_ROUNDS, and DBL_MAX.
+ * #define MALLOC your_malloc, where your_malloc(n) acts like malloc(n)
+ *	if memory is available and otherwise does something you deem
+ *	appropriate.  If MALLOC is undefined, malloc will be invoked
+ *	directly -- and assumed always to succeed.  Similarly, if you
+ *	want something other than the system's free() to be called to
+ *	recycle memory acquired from MALLOC, #define FREE to be the
+ *	name of the alternate routine.  (FREE or free is only called in
+ *	pathological cases, e.g., in a gdtoa call after a gdtoa return in
+ *	mode 3 with thousands of digits requested.)
+ * #define Omit_Private_Memory to omit logic (added Jan. 1998) for making
+ *	memory allocations from a private pool of memory when possible.
+ *	When used, the private pool is PRIVATE_MEM bytes long:  2304 bytes,
+ *	unless #defined to be a different length.  This default length
+ *	suffices to get rid of MALLOC calls except for unusual cases,
+ *	such as decimal-to-binary conversion of a very long string of
+ *	digits.  When converting IEEE double precision values, the
+ *	longest string gdtoa can return is about 751 bytes long.  For
+ *	conversions by strtod of strings of 800 digits and all gdtoa
+ *	conversions of IEEE doubles in single-threaded executions with
+ *	8-byte pointers, PRIVATE_MEM >= 7400 appears to suffice; with
+ *	4-byte pointers, PRIVATE_MEM >= 7112 appears adequate.
+ * #define NO_INFNAN_CHECK if you do not wish to have INFNAN_CHECK
+ *	#defined automatically on IEEE systems.  On such systems,
+ *	when INFNAN_CHECK is #defined, strtod checks
+ *	for Infinity and NaN (case insensitively).
+ *	When INFNAN_CHECK is #defined and No_Hex_NaN is not #defined,
+ *	strtodg also accepts (case insensitively) strings of the form
+ *	NaN(x), where x is a string of hexadecimal digits (optionally
+ *	preceded by 0x or 0X) and spaces; if there is only one string
+ *	of hexadecimal digits, it is taken for the fraction bits of the
+ *	resulting NaN; if there are two or more strings of hexadecimal
+ *	digits, each string is assigned to the next available sequence
+ *	of 32-bit words of fractions bits (starting with the most
+ *	significant), right-aligned in each sequence.
+ *	Unless GDTOA_NON_PEDANTIC_NANCHECK is #defined, input "NaN(...)"
+ *	is consumed even when ... has the wrong form (in which case the
+ *	"(...)" is consumed but ignored).
+ * #define MULTIPLE_THREADS if the system offers preemptively scheduled
+ *	multiple threads.  In this case, you must provide (or suitably
+ *	#define) two locks, acquired by ACQUIRE_DTOA_LOCK(n) and freed
+ *	by FREE_DTOA_LOCK(n) for n = 0 or 1.  (The second lock, accessed
+ *	in pow5mult, ensures lazy evaluation of only one copy of high
+ *	powers of 5; omitting this lock would introduce a small
+ *	probability of wasting memory, but would otherwise be harmless.)
+ *	You must also invoke freedtoa(s) to free the value s returned by
+ *	dtoa.  You may do so whether or not MULTIPLE_THREADS is #defined.
+ * #define IMPRECISE_INEXACT if you do not care about the setting of
+ *	the STRTOG_Inexact bits in the special case of doing IEEE double
+ *	precision conversions (which could also be done by the strtod in
+ *	dtoa.c).
+ * #define NO_HEX_FP to disable recognition of C9x's hexadecimal
+ *	floating-point constants.
+ * #define -DNO_ERRNO to suppress setting errno (in strtod.c and
+ *	strtodg.c).
+ * #define NO_STRING_H to use private versions of memcpy.
+ *	On some K&R systems, it may also be necessary to
+ *	#define DECLARE_SIZE_T in this case.
+ * #define USE_LOCALE to use the current locale's decimal_point value.
+ */
+
+#ifndef GDTOAIMP_H_INCLUDED
+#define GDTOAIMP_H_INCLUDED
+#include "gdtoa.h"
+
+#if defined(_MSC_VER)
+/* [RH] Generating gd_qnan.h strikes me as too cumbersome under Visual
+ * Studio, so here's the equivalent, given the limited number of
+ * architectures that MSC can target. (Itanium? Who cares about that?)
+ */
+#define f_QNAN 0xffc00000
+#define d_QNAN0 0x0
+#define d_QNAN1 0xfff80000
+#define ld_QNAN0 0x0
+#define ld_QNAN1 0xfff80000
+#define ld_QNAN2 0x0
+#define ld_QNAN3 0x0
+#define ldus_QNAN0 0x0
+#define ldus_QNAN1 0x0
+#define ldus_QNAN2 0x0
+#define ldus_QNAN3 0xfff8
+#define ldus_QNAN4 0x0
+/* [RH] Interestingly, MinGW produces something different because
+ * it turns out that it has a true long double type. I thought that
+ * all ia32 compilers had phased out extended precision.
+ */
+#elif defined(__APPLE__)
+#if defined(__x86_64__) || defined(__i386__)
+#define f_QNAN 0xffc00000
+#define d_QNAN0 0x0
+#define d_QNAN1 0xfff80000
+#define ld_QNAN0 0x0
+#define ld_QNAN1 0xc0000000
+#define ld_QNAN2 0xffff
+#define ld_QNAN3 0x0
+#define ldus_QNAN0 0x0
+#define ldus_QNAN1 0x0
+#define ldus_QNAN2 0x0
+#define ldus_QNAN3 0xc000
+#define ldus_QNAN4 0xffff
+#else
+#define f_QNAN 0xffc00000
+#define d_QNAN0 0xfff80000
+#define d_QNAN1 0x0
+#define ld_QNAN0 0xfff80000
+#define ld_QNAN1 0x0
+#define ld_QNAN2 0x0
+#define ld_QNAN3 0x0
+#define ldus_QNAN0 0xfff8
+#define ldus_QNAN1 0x0
+#define ldus_QNAN2 0x0
+#define ldus_QNAN3 0x0
+#define ldus_QNAN4 0x0
+#endif
+#else
+#include "gd_qnan.h"
+#endif
+
+#ifdef Honor_FLT_ROUNDS
+#include <fenv.h>
+#endif
+
+#ifdef DEBUG
+#include "stdio.h"
+#define Bug(x) {fprintf(stderr, "%s\n", x); exit(1);}
+#endif
+
+#include "stdlib.h"
+#include "string.h"
+
+#ifdef KR_headers
+#define Char char
+#else
+#define Char void
+#endif
+
+#ifdef MALLOC
+extern Char *MALLOC ANSI((size_t));
+#else
+#define MALLOC malloc
+#endif
+
+#undef IEEE_Arith
+#undef Avoid_Underflow
+#ifdef IEEE_MC68k
+#define IEEE_Arith
+#endif
+#ifdef IEEE_8087
+#define IEEE_Arith
+#endif
+
+#include "errno.h"
+#ifdef Bad_float_h
+
+#ifdef IEEE_Arith
+#define DBL_DIG 15
+#define DBL_MAX_10_EXP 308
+#define DBL_MAX_EXP 1024
+#define FLT_RADIX 2
+#define DBL_MAX 1.7976931348623157e+308
+#endif
+
+#ifdef IBM
+#define DBL_DIG 16
+#define DBL_MAX_10_EXP 75
+#define DBL_MAX_EXP 63
+#define FLT_RADIX 16
+#define DBL_MAX 7.2370055773322621e+75
+#endif
+
+#ifdef VAX
+#define DBL_DIG 16
+#define DBL_MAX_10_EXP 38
+#define DBL_MAX_EXP 127
+#define FLT_RADIX 2
+#define DBL_MAX 1.7014118346046923e+38
+#define n_bigtens 2
+#endif
+
+#ifndef LONG_MAX
+#define LONG_MAX 2147483647
+#endif
+
+#else /* ifndef Bad_float_h */
+#include "float.h"
+#endif /* Bad_float_h */
+
+#ifdef IEEE_Arith
+#define Scale_Bit 0x10
+#define n_bigtens 5
+#endif
+
+#ifdef IBM
+#define n_bigtens 3
+#endif
+
+#ifdef VAX
+#define n_bigtens 2
+#endif
+
+#ifndef __MATH_H__
+#include "math.h"
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(IEEE_8087) + defined(IEEE_MC68k) + defined(VAX) + defined(IBM) != 1
+Exactly one of IEEE_8087, IEEE_MC68k, VAX, or IBM should be defined.
+#endif
+
+typedef union { double d; ULong L[2]; } U;
+
+#ifdef IEEE_8087
+#define word0(x) (x)->L[1]
+#define word1(x) (x)->L[0]
+#else
+#define word0(x) (x)->L[0]
+#define word1(x) (x)->L[1]
+#endif
+#define dval(x) (x)->d
+
+/* The following definition of Storeinc is appropriate for MIPS processors.
+ * An alternative that might be better on some machines is
+ * #define Storeinc(a,b,c) (*a++ = b << 16 | c & 0xffff)
+ */
+#if defined(IEEE_8087) + defined(VAX)
+#define Storeinc(a,b,c) (((unsigned short *)a)[1] = (unsigned short)b, \
+((unsigned short *)a)[0] = (unsigned short)c, a++)
+#else
+#define Storeinc(a,b,c) (((unsigned short *)a)[0] = (unsigned short)b, \
+((unsigned short *)a)[1] = (unsigned short)c, a++)
+#endif
+
+/* #define P DBL_MANT_DIG */
+/* Ten_pmax = floor(P*log(2)/log(5)) */
+/* Bletch = (highest power of 2 < DBL_MAX_10_EXP) / 16 */
+/* Quick_max = floor((P-1)*log(FLT_RADIX)/log(10) - 1) */
+/* Int_max = floor(P*log(FLT_RADIX)/log(10) - 1) */
+
+#ifdef IEEE_Arith
+#define Exp_shift  20
+#define Exp_shift1 20
+#define Exp_msk1    0x100000
+#define Exp_msk11   0x100000
+#define Exp_mask  0x7ff00000
+#define P 53
+#define Bias 1023
+#define Emin (-1022)
+#define Exp_1  0x3ff00000
+#define Exp_11 0x3ff00000
+#define Ebits 11
+#define Frac_mask  0xfffff
+#define Frac_mask1 0xfffff
+#define Ten_pmax 22
+#define Bletch 0x10
+#define Bndry_mask  0xfffff
+#define Bndry_mask1 0xfffff
+#define LSB 1
+#define Sign_bit 0x80000000
+#define Log2P 1
+#define Tiny0 0
+#define Tiny1 1
+#define Quick_max 14
+#define Int_max 14
+
+#ifndef Flt_Rounds
+#ifdef FLT_ROUNDS
+#define Flt_Rounds FLT_ROUNDS
+#else
+#define Flt_Rounds 1
+#endif
+#endif /*Flt_Rounds*/
+
+#else /* ifndef IEEE_Arith */
+#undef  Sudden_Underflow
+#define Sudden_Underflow
+#ifdef IBM
+#undef Flt_Rounds
+#define Flt_Rounds 0
+#define Exp_shift  24
+#define Exp_shift1 24
+#define Exp_msk1   0x1000000
+#define Exp_msk11  0x1000000
+#define Exp_mask  0x7f000000
+#define P 14
+#define Bias 65
+#define Exp_1  0x41000000
+#define Exp_11 0x41000000
+#define Ebits 8	/* exponent has 7 bits, but 8 is the right value in b2d */
+#define Frac_mask  0xffffff
+#define Frac_mask1 0xffffff
+#define Bletch 4
+#define Ten_pmax 22
+#define Bndry_mask  0xefffff
+#define Bndry_mask1 0xffffff
+#define LSB 1
+#define Sign_bit 0x80000000
+#define Log2P 4
+#define Tiny0 0x100000
+#define Tiny1 0
+#define Quick_max 14
+#define Int_max 15
+#else /* VAX */
+#undef Flt_Rounds
+#define Flt_Rounds 1
+#define Exp_shift  23
+#define Exp_shift1 7
+#define Exp_msk1    0x80
+#define Exp_msk11   0x800000
+#define Exp_mask  0x7f80
+#define P 56
+#define Bias 129
+#define Exp_1  0x40800000
+#define Exp_11 0x4080
+#define Ebits 8
+#define Frac_mask  0x7fffff
+#define Frac_mask1 0xffff007f
+#define Ten_pmax 24
+#define Bletch 2
+#define Bndry_mask  0xffff007f
+#define Bndry_mask1 0xffff007f
+#define LSB 0x10000
+#define Sign_bit 0x8000
+#define Log2P 1
+#define Tiny0 0x80
+#define Tiny1 0
+#define Quick_max 15
+#define Int_max 15
+#endif /* IBM, VAX */
+#endif /* IEEE_Arith */
+
+#ifndef IEEE_Arith
+#define ROUND_BIASED
+#else
+#ifdef ROUND_BIASED_without_Round_Up
+#undef  ROUND_BIASED
+#define ROUND_BIASED
+#endif
+#endif
+
+#ifdef RND_PRODQUOT
+#define rounded_product(a,b) a = rnd_prod(a, b)
+#define rounded_quotient(a,b) a = rnd_quot(a, b)
+#ifdef KR_headers
+extern double rnd_prod(), rnd_quot();
+#else
+extern double rnd_prod(double, double), rnd_quot(double, double);
+#endif
+#else
+#define rounded_product(a,b) a *= b
+#define rounded_quotient(a,b) a /= b
+#endif
+
+#define Big0 (Frac_mask1 | Exp_msk1*(DBL_MAX_EXP+Bias-1))
+#define Big1 0xffffffff
+
+#undef  Pack_16
+#ifndef Pack_32
+#define Pack_32
+#endif
+
+#ifdef NO_LONG_LONG
+#undef ULLong
+#ifdef Just_16
+#undef Pack_32
+#define Pack_16
+/* When Pack_32 is not defined, we store 16 bits per 32-bit Long.
+ * This makes some inner loops simpler and sometimes saves work
+ * during multiplications, but it often seems to make things slightly
+ * slower.  Hence the default is now to store 32 bits per Long.
+ */
+#endif
+#else	/* long long available */
+#ifndef Llong
+#define Llong long long
+#endif
+#ifndef ULLong
+#define ULLong unsigned Llong
+#endif
+#endif /* NO_LONG_LONG */
+
+#ifdef Pack_32
+#define ULbits 32
+#define kshift 5
+#define kmask 31
+#define ALL_ON 0xffffffff
+#else
+#define ULbits 16
+#define kshift 4
+#define kmask 15
+#define ALL_ON 0xffff
+#endif
+
+//#ifndef MULTIPLE_THREADS
+#define ACQUIRE_DTOA_LOCK(n)	/*nothing*/
+#define FREE_DTOA_LOCK(n)	/*nothing*/
+//#endif
+
+#define Kmax 9
+
+ struct
+Bigint {
+	struct Bigint *next;
+	int k, maxwds, sign, wds;
+	ULong x[1];
+	};
+
+ typedef struct Bigint Bigint;
+
+#ifdef NO_STRING_H
+#ifdef DECLARE_SIZE_T
+typedef unsigned int size_t;
+#endif
+extern void memcpy_D2A ANSI((void*, const void*, size_t));
+#define Bcopy(x,y) memcpy_D2A(&x->sign,&y->sign,y->wds*sizeof(ULong) + 2*sizeof(int))
+#else /* !NO_STRING_H */
+#define Bcopy(x,y) memcpy(&x->sign,&y->sign,y->wds*sizeof(ULong) + 2*sizeof(int))
+#endif /* NO_STRING_H */
+
+#define Balloc Balloc_D2A
+#define Bfree Bfree_D2A
+#define InfName InfName_D2A
+#define NanName NanName_D2A
+#define ULtoQ ULtoQ_D2A
+#define ULtof ULtof_D2A
+#define ULtod ULtod_D2A
+#define ULtodd ULtodd_D2A
+#define ULtox ULtox_D2A
+#define ULtoxL ULtoxL_D2A
+#define add_nanbits add_nanbits_D2A
+#define any_on any_on_D2A
+#define b2d b2d_D2A
+#define bigtens bigtens_D2A
+#define cmp cmp_D2A
+#define copybits copybits_D2A
+#define d2b d2b_D2A
+#define decrement decrement_D2A
+#define diff diff_D2A
+#define dtoa_result dtoa_result_D2A
+#define g__fmt g__fmt_D2A
+#define gethex gethex_D2A
+#define hexdig hexdig_D2A
+#define hexnan hexnan_D2A
+#define hi0bits(x) hi0bits_D2A((ULong)(x))
+#define i2b i2b_D2A
+#define increment increment_D2A
+#define lo0bits lo0bits_D2A
+#define lshift lshift_D2A
+#define match match_D2A
+#define mult mult_D2A
+#define multadd multadd_D2A
+#define nrv_alloc nrv_alloc_D2A
+#define pow5mult pow5mult_D2A
+#define quorem quorem_D2A
+#define ratio ratio_D2A
+#define rshift rshift_D2A
+#define rv_alloc rv_alloc_D2A
+#define s2b s2b_D2A
+#define set_ones set_ones_D2A
+#define strcp strcp_D2A
+#define strtoIg strtoIg_D2A
+#define sum sum_D2A
+#define tens tens_D2A
+#define tinytens tinytens_D2A
+#define tinytens tinytens_D2A
+#define trailz trailz_D2A
+#define ulp ulp_D2A
+
+ extern char *add_nanbits ANSI((char*, size_t, ULong*, int));
+ extern char *dtoa_result;
+ extern CONST double bigtens[], tens[], tinytens[];
+ extern unsigned char hexdig[];
+ extern const char *InfName[6], *NanName[3];
+
+ extern Bigint *Balloc ANSI((int));
+ extern void Bfree ANSI((Bigint*));
+ extern void ULtof ANSI((ULong*, ULong*, Long, int));
+ extern void ULtod ANSI((ULong*, ULong*, Long, int));
+ extern void ULtodd ANSI((ULong*, ULong*, Long, int));
+ extern void ULtoQ ANSI((ULong*, ULong*, Long, int));
+ extern void ULtox ANSI((UShort*, ULong*, Long, int));
+ extern void ULtoxL ANSI((ULong*, ULong*, Long, int));
+ extern ULong any_on ANSI((Bigint*, int));
+ extern double b2d ANSI((Bigint*, int*));
+ extern int cmp ANSI((Bigint*, Bigint*));
+ extern void copybits ANSI((ULong*, int, Bigint*));
+ extern Bigint *d2b ANSI((double, int*, int*));
+ extern void decrement ANSI((Bigint*));
+ extern Bigint *diff ANSI((Bigint*, Bigint*));
+ extern char *dtoa ANSI((double d, int mode, int ndigits,
+			int *decpt, int *sign, char **rve));
+ extern char *g__fmt ANSI((char*, char*, char*, int, ULong, size_t));
+ extern int gethex ANSI((CONST char**, FPI*, Long*, Bigint**, int));
+ extern void hexdig_init_D2A(Void);
+ extern int hexnan ANSI((CONST char**, FPI*, ULong*));
+ extern int hi0bits_D2A ANSI((ULong));
+ extern Bigint *i2b ANSI((int));
+ extern Bigint *increment ANSI((Bigint*));
+ extern int lo0bits ANSI((ULong*));
+ extern Bigint *lshift ANSI((Bigint*, int));
+ extern int match ANSI((CONST char**, char*));
+ extern Bigint *mult ANSI((Bigint*, Bigint*));
+ extern Bigint *multadd ANSI((Bigint*, int, int));
+ extern char *nrv_alloc ANSI((char*, char **, int));
+ extern Bigint *pow5mult ANSI((Bigint*, int));
+ extern int quorem ANSI((Bigint*, Bigint*));
+ extern double ratio ANSI((Bigint*, Bigint*));
+ extern void rshift ANSI((Bigint*, int));
+ extern char *rv_alloc ANSI((int));
+ extern Bigint *s2b ANSI((CONST char*, int, int, ULong, int));
+ extern Bigint *set_ones ANSI((Bigint*, int));
+ extern char *strcp ANSI((char*, const char*));
+ extern int strtoIg ANSI((CONST char*, char**, FPI*, Long*, Bigint**, int*));
+// extern double strtod ANSI((const char *s00, char **se));
+ extern Bigint *sum ANSI((Bigint*, Bigint*));
+ extern int trailz ANSI((Bigint*));
+ extern double ulp ANSI((U*));
+
+#ifdef __cplusplus
+}
+#endif
+/*
+ * NAN_WORD0 and NAN_WORD1 are only referenced in strtod.c.  Prior to
+ * 20050115, they used to be hard-wired here (to 0x7ff80000 and 0,
+ * respectively), but now are determined by compiling and running
+ * qnan.c to generate gd_qnan.h, which specifies d_QNAN0 and d_QNAN1.
+ * Formerly gdtoaimp.h recommended supplying suitable -DNAN_WORD0=...
+ * and -DNAN_WORD1=...  values if necessary.  This should still work.
+ * (On HP Series 700/800 machines, -DNAN_WORD0=0x7ff40000 works.)
+ */
+#ifdef IEEE_Arith
+#ifndef NO_INFNAN_CHECK
+#undef INFNAN_CHECK
+#define INFNAN_CHECK
+#endif
+#ifdef IEEE_MC68k
+#define _0 0
+#define _1 1
+#ifndef NAN_WORD0
+#define NAN_WORD0 d_QNAN0
+#endif
+#ifndef NAN_WORD1
+#define NAN_WORD1 d_QNAN1
+#endif
+#else
+#define _0 1
+#define _1 0
+#ifndef NAN_WORD0
+#define NAN_WORD0 d_QNAN1
+#endif
+#ifndef NAN_WORD1
+#define NAN_WORD1 d_QNAN0
+#endif
+#endif
+#else
+#undef INFNAN_CHECK
+#endif
+
+#undef SI
+#ifdef Sudden_Underflow
+#define SI 1
+#else
+#define SI 0
+#endif
+
+#endif /* GDTOAIMP_H_INCLUDED */
diff --git a/libraries/gdtoa/gethex.c b/libraries/gdtoa/gethex.c
new file mode 100644
index 000000000..72da9d326
--- /dev/null
+++ b/libraries/gdtoa/gethex.c
@@ -0,0 +1,349 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+#ifdef USE_LOCALE
+#include "locale.h"
+#endif
+
+ int
+#ifdef KR_headers
+gethex(sp, fpi, exp, bp, sign)
+	CONST char **sp; FPI *fpi; Long *exp; Bigint **bp; int sign;
+#else
+gethex( CONST char **sp, FPI *fpi, Long *exp, Bigint **bp, int sign)
+#endif
+{
+	Bigint *b;
+	CONST unsigned char *decpt, *s0, *s, *s1;
+	int big, esign, havedig, irv, j, k, n, n0, nbits, up, zret;
+	ULong L, lostbits, *x;
+	Long e, e1;
+#ifdef USE_LOCALE
+	int i;
+#ifdef NO_LOCALE_CACHE
+	const unsigned char *decimalpoint = (unsigned char*)localeconv()->decimal_point;
+#else
+	const unsigned char *decimalpoint;
+	static unsigned char *decimalpoint_cache;
+	if (!(s0 = decimalpoint_cache)) {
+		s0 = (unsigned char*)localeconv()->decimal_point;
+		if ((decimalpoint_cache = (char*)MALLOC(strlen(s0) + 1))) {
+			strcpy(decimalpoint_cache, s0);
+			s0 = decimalpoint_cache;
+			}
+		}
+	decimalpoint = s0;
+#endif
+#endif
+
+	/**** if (!hexdig['0']) hexdig_init_D2A(); ****/
+	*bp = 0;
+	havedig = 0;
+	s0 = *(CONST unsigned char **)sp + 2;
+	while(s0[havedig] == '0')
+		havedig++;
+	s0 += havedig;
+	s = s0;
+	decpt = 0;
+	zret = 0;
+	e = 0;
+	if (hexdig[*s])
+		havedig++;
+	else {
+		zret = 1;
+#ifdef USE_LOCALE
+		for(i = 0; decimalpoint[i]; ++i) {
+			if (s[i] != decimalpoint[i])
+				goto pcheck;
+			}
+		decpt = s += i;
+#else
+		if (*s != '.')
+			goto pcheck;
+		decpt = ++s;
+#endif
+		if (!hexdig[*s])
+			goto pcheck;
+		while(*s == '0')
+			s++;
+		if (hexdig[*s])
+			zret = 0;
+		havedig = 1;
+		s0 = s;
+		}
+	while(hexdig[*s])
+		s++;
+#ifdef USE_LOCALE
+	if (*s == *decimalpoint && !decpt) {
+		for(i = 1; decimalpoint[i]; ++i) {
+			if (s[i] != decimalpoint[i])
+				goto pcheck;
+			}
+		decpt = s += i;
+#else
+	if (*s == '.' && !decpt) {
+		decpt = ++s;
+#endif
+		while(hexdig[*s])
+			s++;
+		}/*}*/
+	if (decpt)
+		e = -(((Long)(s-decpt)) << 2);
+ pcheck:
+	s1 = s;
+	big = esign = 0;
+	switch(*s) {
+	  case 'p':
+	  case 'P':
+		switch(*++s) {
+		  case '-':
+			esign = 1;
+			/* no break */
+		  case '+':
+			s++;
+		  }
+		if ((n = hexdig[*s]) == 0 || n > 0x19) {
+			s = s1;
+			break;
+			}
+		e1 = n - 0x10;
+		while((n = hexdig[*++s]) !=0 && n <= 0x19) {
+			if (e1 & 0xf8000000)
+				big = 1;
+			e1 = 10*e1 + n - 0x10;
+			}
+		if (esign)
+			e1 = -e1;
+		e += e1;
+	  }
+	*sp = (char*)s;
+	if (!havedig)
+		*sp = (char*)s0 - 1;
+	if (zret)
+		return STRTOG_Zero;
+	if (big) {
+		if (esign) {
+			switch(fpi->rounding) {
+			  case FPI_Round_up:
+				if (sign)
+					break;
+				goto ret_tiny;
+			  case FPI_Round_down:
+				if (!sign)
+					break;
+				goto ret_tiny;
+			  }
+			goto retz;
+ ret_tiny:
+			b = Balloc(0);
+			b->wds = 1;
+			b->x[0] = 1;
+			goto dret;
+			}
+		switch(fpi->rounding) {
+		  case FPI_Round_near:
+			goto ovfl1;
+		  case FPI_Round_up:
+			if (!sign)
+				goto ovfl1;
+			goto ret_big;
+		  case FPI_Round_down:
+			if (sign)
+				goto ovfl1;
+			goto ret_big;
+		  }
+ ret_big:
+		nbits = fpi->nbits;
+		n0 = n = nbits >> kshift;
+		if (nbits & kmask)
+			++n;
+		for(j = n, k = 0; j >>= 1; ++k);
+		*bp = b = Balloc(k);
+		b->wds = n;
+		for(j = 0; j < n0; ++j)
+			b->x[j] = ALL_ON;
+		if (n > n0)
+			b->x[j] = ULbits >> (ULbits - (nbits & kmask));
+		*exp = fpi->emin;
+		return STRTOG_Normal | STRTOG_Inexlo;
+		}
+	n = s1 - s0 - 1;
+	for(k = 0; n > (1 << (kshift-2)) - 1; n >>= 1)
+		k++;
+	b = Balloc(k);
+	x = b->x;
+	n = 0;
+	L = 0;
+#ifdef USE_LOCALE
+	for(i = 0; decimalpoint[i+1]; ++i);
+#endif
+	while(s1 > s0) {
+#ifdef USE_LOCALE
+		if (*--s1 == decimalpoint[i]) {
+			s1 -= i;
+			continue;
+			}
+#else
+		if (*--s1 == '.')
+			continue;
+#endif
+		if (n == ULbits) {
+			*x++ = L;
+			L = 0;
+			n = 0;
+			}
+		L |= (hexdig[*s1] & 0x0f) << n;
+		n += 4;
+		}
+	*x++ = L;
+	b->wds = n = x - b->x;
+	n = ULbits*n - hi0bits(L);
+	nbits = fpi->nbits;
+	lostbits = 0;
+	x = b->x;
+	if (n > nbits) {
+		n -= nbits;
+		if (any_on(b,n)) {
+			lostbits = 1;
+			k = n - 1;
+			if (x[k>>kshift] & 1 << (k & kmask)) {
+				lostbits = 2;
+				if (k > 0 && any_on(b,k))
+					lostbits = 3;
+				}
+			}
+		rshift(b, n);
+		e += n;
+		}
+	else if (n < nbits) {
+		n = nbits - n;
+		b = lshift(b, n);
+		e -= n;
+		x = b->x;
+		}
+	if (e > fpi->emax) {
+ ovfl:
+		Bfree(b);
+ ovfl1:
+#ifndef NO_ERRNO
+		errno = ERANGE;
+#endif
+		return STRTOG_Infinite | STRTOG_Overflow | STRTOG_Inexhi;
+		}
+	irv = STRTOG_Normal;
+	if (e < fpi->emin) {
+		irv = STRTOG_Denormal;
+		n = fpi->emin - e;
+		if (n >= nbits) {
+			switch (fpi->rounding) {
+			  case FPI_Round_near:
+				if (n == nbits && (n < 2 || any_on(b,n-1)))
+					goto one_bit;
+				break;
+			  case FPI_Round_up:
+				if (!sign)
+					goto one_bit;
+				break;
+			  case FPI_Round_down:
+				if (sign) {
+ one_bit:
+					x[0] = b->wds = 1;
+ dret:
+					*bp = b;
+					*exp = fpi->emin;
+#ifndef NO_ERRNO
+					errno = ERANGE;
+#endif
+					return STRTOG_Denormal | STRTOG_Inexhi
+						| STRTOG_Underflow;
+					}
+			  }
+			Bfree(b);
+ retz:
+#ifndef NO_ERRNO
+			errno = ERANGE;
+#endif
+			return STRTOG_Zero | STRTOG_Inexlo | STRTOG_Underflow;
+			}
+		k = n - 1;
+		if (lostbits)
+			lostbits = 1;
+		else if (k > 0)
+			lostbits = any_on(b,k);
+		if (x[k>>kshift] & 1 << (k & kmask))
+			lostbits |= 2;
+		nbits -= n;
+		rshift(b,n);
+		e = fpi->emin;
+		}
+	if (lostbits) {
+		up = 0;
+		switch(fpi->rounding) {
+		  case FPI_Round_zero:
+			break;
+		  case FPI_Round_near:
+			if (lostbits & 2
+			 && (lostbits | x[0]) & 1)
+				up = 1;
+			break;
+		  case FPI_Round_up:
+			up = 1 - sign;
+			break;
+		  case FPI_Round_down:
+			up = sign;
+		  }
+		if (up) {
+			k = b->wds;
+			b = increment(b);
+			x = b->x;
+			if (irv == STRTOG_Denormal) {
+				if (nbits == fpi->nbits - 1
+				 && x[nbits >> kshift] & 1 << (nbits & kmask))
+					irv =  STRTOG_Normal;
+				}
+			else if (b->wds > k
+			 || ((n = nbits & kmask) !=0
+			      && hi0bits(x[k-1]) < 32-n)) {
+				rshift(b,1);
+				if (++e > fpi->emax)
+					goto ovfl;
+				}
+			irv |= STRTOG_Inexhi;
+			}
+		else
+			irv |= STRTOG_Inexlo;
+		}
+	*bp = b;
+	*exp = e;
+	return irv;
+	}
diff --git a/libraries/gdtoa/gmisc.c b/libraries/gdtoa/gmisc.c
new file mode 100644
index 000000000..8270ef944
--- /dev/null
+++ b/libraries/gdtoa/gmisc.c
@@ -0,0 +1,86 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+ void
+#ifdef KR_headers
+rshift(b, k) Bigint *b; int k;
+#else
+rshift(Bigint *b, int k)
+#endif
+{
+	ULong *x, *x1, *xe, y;
+	int n;
+
+	x = x1 = b->x;
+	n = k >> kshift;
+	if (n < b->wds) {
+		xe = x + b->wds;
+		x += n;
+		if (k &= kmask) {
+			n = ULbits - k;
+			y = *x++ >> k;
+			while(x < xe) {
+				*x1++ = (y | (*x << n)) & ALL_ON;
+				y = *x++ >> k;
+				}
+			if ((*x1 = y) !=0)
+				x1++;
+			}
+		else
+			while(x < xe)
+				*x1++ = *x++;
+		}
+	if ((b->wds = x1 - b->x) == 0)
+		b->x[0] = 0;
+	}
+
+ int
+#ifdef KR_headers
+trailz(b) Bigint *b;
+#else
+trailz(Bigint *b)
+#endif
+{
+	ULong L, *x, *xe;
+	int n = 0;
+
+	x = b->x;
+	xe = x + b->wds;
+	for(n = 0; x < xe && !*x; x++)
+		n += ULbits;
+	if (x < xe) {
+		L = *x;
+		n += lo0bits(&L);
+		}
+	return n;
+	}
diff --git a/libraries/gdtoa/hd_init.c b/libraries/gdtoa/hd_init.c
new file mode 100644
index 000000000..d79ae2ec8
--- /dev/null
+++ b/libraries/gdtoa/hd_init.c
@@ -0,0 +1,77 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 2000 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+#if 0
+ unsigned char hexdig[256];
+
+ static void
+#ifdef KR_headers
+htinit(h, s, inc) unsigned char *h; unsigned char *s; int inc;
+#else
+htinit(unsigned char *h, unsigned char *s, int inc)
+#endif
+{
+	int i, j;
+	for(i = 0; (j = s[i]) !=0; i++)
+		h[j] = i + inc;
+	}
+
+ void
+hexdig_init_D2A(Void)	/* Use of hexdig_init omitted 20121220 to avoid a */
+			/* race condition when multiple threads are used. */
+{
+#define USC (unsigned char *)
+	htinit(hexdig, USC "0123456789", 0x10);
+	htinit(hexdig, USC "abcdef", 0x10 + 10);
+	htinit(hexdig, USC "ABCDEF", 0x10 + 10);
+	}
+#else
+ unsigned char hexdig[256] = {
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+	16,17,18,19,20,21,22,23,24,25,0,0,0,0,0,0,
+	0,26,27,28,29,30,31,0,0,0,0,0,0,0,0,0,
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+	0,26,27,28,29,30,31,0,0,0,0,0,0,0,0,0,
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+	};
+#endif
diff --git a/libraries/gdtoa/hexnan.c b/libraries/gdtoa/hexnan.c
new file mode 100644
index 000000000..80721e97a
--- /dev/null
+++ b/libraries/gdtoa/hexnan.c
@@ -0,0 +1,159 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 2000 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+ static void
+#ifdef KR_headers
+L_shift(x, x1, i) ULong *x; ULong *x1; int i;
+#else
+L_shift(ULong *x, ULong *x1, int i)
+#endif
+{
+	int j;
+
+	i = 8 - i;
+	i <<= 2;
+	j = ULbits - i;
+	do {
+		*x |= x[1] << j;
+		x[1] >>= i;
+		} while(++x < x1);
+	}
+
+ int
+#ifdef KR_headers
+hexnan(sp, fpi, x0)
+	CONST char **sp; FPI *fpi; ULong *x0;
+#else
+hexnan( CONST char **sp, FPI *fpi, ULong *x0)
+#endif
+{
+	ULong c, h, *x, *x1, *xe;
+	CONST char *s;
+	int havedig, hd0, i, nbits;
+
+	/**** if (!hexdig['0']) hexdig_init_D2A(); ****/
+	nbits = fpi->nbits;
+	x = x0 + (nbits >> kshift);
+	if (nbits & kmask)
+		x++;
+	*--x = 0;
+	x1 = xe = x;
+	havedig = hd0 = i = 0;
+	s = *sp;
+	/* allow optional initial 0x or 0X */
+	while((c = *(CONST unsigned char*)(s+1)) && c <= ' ') {
+		if (!c)
+			goto retnan;
+		++s;
+		}
+	if (s[1] == '0' && (s[2] == 'x' || s[2] == 'X')
+	 && *(CONST unsigned char*)(s+3) > ' ')
+		s += 2;
+	while((c = *(CONST unsigned char*)++s)) {
+		if (!(h = hexdig[c])) {
+			if (c <= ' ') {
+				if (hd0 < havedig) {
+					if (x < x1 && i < 8)
+						L_shift(x, x1, i);
+					if (x <= x0) {
+						i = 8;
+						continue;
+						}
+					hd0 = havedig;
+					*--x = 0;
+					x1 = x;
+					i = 0;
+					}
+				while((c = *(CONST unsigned char*)(s+1)) <= ' ') {
+					if (!c)
+						goto retnan;
+					++s;
+					}
+				if (s[1] == '0' && (s[2] == 'x' || s[2] == 'X')
+				 && *(CONST unsigned char*)(s+3) > ' ')
+					s += 2;
+				continue;
+				}
+			if (/*(*/ c == ')' && havedig) {
+				*sp = s + 1;
+				break;
+				}
+#ifndef GDTOA_NON_PEDANTIC_NANCHECK
+			do {
+				if (/*(*/ c == ')') {
+					*sp = s + 1;
+					goto break2;
+					}
+				} while((c = *++s));
+#endif
+ retnan:
+			return STRTOG_NaN;
+			}
+		havedig++;
+		if (++i > 8) {
+			if (x <= x0)
+				continue;
+			i = 1;
+			*--x = 0;
+			}
+		*x = (*x << 4) | (h & 0xf);
+		}
+#ifndef GDTOA_NON_PEDANTIC_NANCHECK
+ break2:
+#endif
+	if (!havedig)
+		return STRTOG_NaN;
+	if (x < x1 && i < 8)
+		L_shift(x, x1, i);
+	if (x > x0) {
+		x1 = x0;
+		do *x1++ = *x++;
+			while(x <= xe);
+		do *x1++ = 0;
+			while(x1 <= xe);
+		}
+	else {
+		/* truncate high-order word if necessary */
+		if ( (i = nbits & (ULbits-1)) !=0)
+			*xe &= ((ULong)0xffffffff) >> (ULbits - i);
+		}
+	for(x1 = xe;; --x1) {
+		if (*x1 != 0)
+			break;
+		if (x1 == x0) {
+			*x1 = 1;
+			break;
+			}
+		}
+	return STRTOG_NaNbits;
+	}
diff --git a/libraries/gdtoa/misc.c b/libraries/gdtoa/misc.c
new file mode 100644
index 000000000..d13046732
--- /dev/null
+++ b/libraries/gdtoa/misc.c
@@ -0,0 +1,875 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998, 1999 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+ static Bigint *freelist[Kmax+1];
+#ifndef Omit_Private_Memory
+#ifndef PRIVATE_MEM
+#define PRIVATE_MEM 2304
+#endif
+#define PRIVATE_mem ((PRIVATE_MEM+sizeof(double)-1)/sizeof(double))
+static double private_mem[PRIVATE_mem], *pmem_next = private_mem;
+#endif
+
+ Bigint *
+Balloc
+#ifdef KR_headers
+	(k) int k;
+#else
+	(int k)
+#endif
+{
+	int x;
+	Bigint *rv;
+#ifndef Omit_Private_Memory
+	unsigned int len;
+#endif
+
+	ACQUIRE_DTOA_LOCK(0);
+	/* The k > Kmax case does not need ACQUIRE_DTOA_LOCK(0), */
+	/* but this case seems very unlikely. */
+	if (k <= Kmax && (rv = freelist[k]) !=0) {
+		freelist[k] = rv->next;
+		}
+	else {
+		x = 1 << k;
+#ifdef Omit_Private_Memory
+		rv = (Bigint *)MALLOC(sizeof(Bigint) + (x-1)*sizeof(ULong));
+#else
+		len = (sizeof(Bigint) + (x-1)*sizeof(ULong) + sizeof(double) - 1)
+			/sizeof(double);
+		if (k <= Kmax && pmem_next - private_mem + len <= PRIVATE_mem) {
+			rv = (Bigint*)pmem_next;
+			pmem_next += len;
+			}
+		else
+			rv = (Bigint*)MALLOC(len*sizeof(double));
+#endif
+		rv->k = k;
+		rv->maxwds = x;
+		}
+	FREE_DTOA_LOCK(0);
+	rv->sign = rv->wds = 0;
+	return rv;
+	}
+
+ void
+Bfree
+#ifdef KR_headers
+	(v) Bigint *v;
+#else
+	(Bigint *v)
+#endif
+{
+	if (v) {
+		if (v->k > Kmax)
+#ifdef FREE
+			FREE((void*)v);
+#else
+			free((void*)v);
+#endif
+		else {
+			ACQUIRE_DTOA_LOCK(0);
+			v->next = freelist[v->k];
+			freelist[v->k] = v;
+			FREE_DTOA_LOCK(0);
+			}
+		}
+	}
+
+ int
+lo0bits
+#ifdef KR_headers
+	(y) ULong *y;
+#else
+	(ULong *y)
+#endif
+{
+	int k;
+	ULong x = *y;
+
+	if (x & 7) {
+		if (x & 1)
+			return 0;
+		if (x & 2) {
+			*y = x >> 1;
+			return 1;
+			}
+		*y = x >> 2;
+		return 2;
+		}
+	k = 0;
+	if (!(x & 0xffff)) {
+		k = 16;
+		x >>= 16;
+		}
+	if (!(x & 0xff)) {
+		k += 8;
+		x >>= 8;
+		}
+	if (!(x & 0xf)) {
+		k += 4;
+		x >>= 4;
+		}
+	if (!(x & 0x3)) {
+		k += 2;
+		x >>= 2;
+		}
+	if (!(x & 1)) {
+		k++;
+		x >>= 1;
+		if (!x)
+			return 32;
+		}
+	*y = x;
+	return k;
+	}
+
+ Bigint *
+multadd
+#ifdef KR_headers
+	(b, m, a) Bigint *b; int m, a;
+#else
+	(Bigint *b, int m, int a)	/* multiply by m and add a */
+#endif
+{
+	int i, wds;
+#ifdef ULLong
+	ULong *x;
+	ULLong carry, y;
+#else
+	ULong carry, *x, y;
+#ifdef Pack_32
+	ULong xi, z;
+#endif
+#endif
+	Bigint *b1;
+
+	wds = b->wds;
+	x = b->x;
+	i = 0;
+	carry = a;
+	do {
+#ifdef ULLong
+		y = *x * (ULLong)m + carry;
+		carry = y >> 32;
+		*x++ = (ULong)(y & 0xffffffffUL);
+#else
+#ifdef Pack_32
+		xi = *x;
+		y = (xi & 0xffff) * m + carry;
+		z = (xi >> 16) * m + (y >> 16);
+		carry = z >> 16;
+		*x++ = (z << 16) + (y & 0xffff);
+#else
+		y = *x * m + carry;
+		carry = y >> 16;
+		*x++ = y & 0xffff;
+#endif
+#endif
+		}
+		while(++i < wds);
+	if (carry) {
+		if (wds >= b->maxwds) {
+			b1 = Balloc(b->k+1);
+			Bcopy(b1, b);
+			Bfree(b);
+			b = b1;
+			}
+		b->x[wds++] = (ULong)carry;
+		b->wds = wds;
+		}
+	return b;
+	}
+
+ int
+hi0bits_D2A
+#ifdef KR_headers
+	(x) ULong x;
+#else
+	(ULong x)
+#endif
+{
+	int k = 0;
+
+	if (!(x & 0xffff0000)) {
+		k = 16;
+		x <<= 16;
+		}
+	if (!(x & 0xff000000)) {
+		k += 8;
+		x <<= 8;
+		}
+	if (!(x & 0xf0000000)) {
+		k += 4;
+		x <<= 4;
+		}
+	if (!(x & 0xc0000000)) {
+		k += 2;
+		x <<= 2;
+		}
+	if (!(x & 0x80000000)) {
+		k++;
+		if (!(x & 0x40000000))
+			return 32;
+		}
+	return k;
+	}
+
+ Bigint *
+i2b
+#ifdef KR_headers
+	(i) int i;
+#else
+	(int i)
+#endif
+{
+	Bigint *b;
+
+	b = Balloc(1);
+	b->x[0] = i;
+	b->wds = 1;
+	return b;
+	}
+
+ Bigint *
+mult
+#ifdef KR_headers
+	(a, b) Bigint *a, *b;
+#else
+	(Bigint *a, Bigint *b)
+#endif
+{
+	Bigint *c;
+	int k, wa, wb, wc;
+	ULong *x, *xa, *xae, *xb, *xbe, *xc, *xc0;
+	ULong y;
+#ifdef ULLong
+	ULLong carry, z;
+#else
+	ULong carry, z;
+#ifdef Pack_32
+	ULong z2;
+#endif
+#endif
+
+	if (a->wds < b->wds) {
+		c = a;
+		a = b;
+		b = c;
+		}
+	k = a->k;
+	wa = a->wds;
+	wb = b->wds;
+	wc = wa + wb;
+	if (wc > a->maxwds)
+		k++;
+	c = Balloc(k);
+	for(x = c->x, xa = x + wc; x < xa; x++)
+		*x = 0;
+	xa = a->x;
+	xae = xa + wa;
+	xb = b->x;
+	xbe = xb + wb;
+	xc0 = c->x;
+#ifdef ULLong
+	for(; xb < xbe; xc0++) {
+		if ( (y = *xb++) !=0) {
+			x = xa;
+			xc = xc0;
+			carry = 0;
+			do {
+				z = *x++ * (ULLong)y + *xc + carry;
+				carry = z >> 32;
+				*xc++ = (ULong)(z & 0xffffffffUL);
+				}
+				while(x < xae);
+			*xc = (ULong)carry;
+			}
+		}
+#else
+#ifdef Pack_32
+	for(; xb < xbe; xb++, xc0++) {
+		if ( (y = *xb & 0xffff) !=0) {
+			x = xa;
+			xc = xc0;
+			carry = 0;
+			do {
+				z = (*x & 0xffff) * y + (*xc & 0xffff) + carry;
+				carry = z >> 16;
+				z2 = (*x++ >> 16) * y + (*xc >> 16) + carry;
+				carry = z2 >> 16;
+				Storeinc(xc, z2, z);
+				}
+				while(x < xae);
+			*xc = carry;
+			}
+		if ( (y = *xb >> 16) !=0) {
+			x = xa;
+			xc = xc0;
+			carry = 0;
+			z2 = *xc;
+			do {
+				z = (*x & 0xffff) * y + (*xc >> 16) + carry;
+				carry = z >> 16;
+				Storeinc(xc, z, z2);
+				z2 = (*x++ >> 16) * y + (*xc & 0xffff) + carry;
+				carry = z2 >> 16;
+				}
+				while(x < xae);
+			*xc = z2;
+			}
+		}
+#else
+	for(; xb < xbe; xc0++) {
+		if ( (y = *xb++) !=0) {
+			x = xa;
+			xc = xc0;
+			carry = 0;
+			do {
+				z = *x++ * y + *xc + carry;
+				carry = z >> 16;
+				*xc++ = z & 0xffff;
+				}
+				while(x < xae);
+			*xc = carry;
+			}
+		}
+#endif
+#endif
+	for(xc0 = c->x, xc = xc0 + wc; wc > 0 && !*--xc; --wc) ;
+	c->wds = wc;
+	return c;
+	}
+
+ static Bigint *p5s;
+
+ Bigint *
+pow5mult
+#ifdef KR_headers
+	(b, k) Bigint *b; int k;
+#else
+	(Bigint *b, int k)
+#endif
+{
+	Bigint *b1, *p5, *p51;
+	int i;
+	static int p05[3] = { 5, 25, 125 };
+
+	if ( (i = k & 3) !=0)
+		b = multadd(b, p05[i-1], 0);
+
+	if (!(k >>= 2))
+		return b;
+	if ((p5 = p5s) == 0) {
+		/* first time */
+#ifdef MULTIPLE_THREADS
+		ACQUIRE_DTOA_LOCK(1);
+		if (!(p5 = p5s)) {
+			p5 = p5s = i2b(625);
+			p5->next = 0;
+			}
+		FREE_DTOA_LOCK(1);
+#else
+		p5 = p5s = i2b(625);
+		p5->next = 0;
+#endif
+		}
+	for(;;) {
+		if (k & 1) {
+			b1 = mult(b, p5);
+			Bfree(b);
+			b = b1;
+			}
+		if (!(k >>= 1))
+			break;
+		if ((p51 = p5->next) == 0) {
+#ifdef MULTIPLE_THREADS
+			ACQUIRE_DTOA_LOCK(1);
+			if (!(p51 = p5->next)) {
+				p51 = p5->next = mult(p5,p5);
+				p51->next = 0;
+				}
+			FREE_DTOA_LOCK(1);
+#else
+			p51 = p5->next = mult(p5,p5);
+			p51->next = 0;
+#endif
+			}
+		p5 = p51;
+		}
+	return b;
+	}
+
+ Bigint *
+lshift
+#ifdef KR_headers
+	(b, k) Bigint *b; int k;
+#else
+	(Bigint *b, int k)
+#endif
+{
+	int i, k1, n, n1;
+	Bigint *b1;
+	ULong *x, *x1, *xe, z;
+
+	n = k >> kshift;
+	k1 = b->k;
+	n1 = n + b->wds + 1;
+	for(i = b->maxwds; n1 > i; i <<= 1)
+		k1++;
+	b1 = Balloc(k1);
+	x1 = b1->x;
+	for(i = 0; i < n; i++)
+		*x1++ = 0;
+	x = b->x;
+	xe = x + b->wds;
+	if (k &= kmask) {
+#ifdef Pack_32
+		k1 = 32 - k;
+		z = 0;
+		do {
+			*x1++ = *x << k | z;
+			z = *x++ >> k1;
+			}
+			while(x < xe);
+		if ((*x1 = z) !=0)
+			++n1;
+#else
+		k1 = 16 - k;
+		z = 0;
+		do {
+			*x1++ = *x << k  & 0xffff | z;
+			z = *x++ >> k1;
+			}
+			while(x < xe);
+		if (*x1 = z)
+			++n1;
+#endif
+		}
+	else do
+		*x1++ = *x++;
+		while(x < xe);
+	b1->wds = n1 - 1;
+	Bfree(b);
+	return b1;
+	}
+
+ int
+cmp
+#ifdef KR_headers
+	(a, b) Bigint *a, *b;
+#else
+	(Bigint *a, Bigint *b)
+#endif
+{
+	ULong *xa, *xa0, *xb, *xb0;
+	int i, j;
+
+	i = a->wds;
+	j = b->wds;
+#ifdef DEBUG
+	if (i > 1 && !a->x[i-1])
+		Bug("cmp called with a->x[a->wds-1] == 0");
+	if (j > 1 && !b->x[j-1])
+		Bug("cmp called with b->x[b->wds-1] == 0");
+#endif
+	if (i -= j)
+		return i;
+	xa0 = a->x;
+	xa = xa0 + j;
+	xb0 = b->x;
+	xb = xb0 + j;
+	for(;;) {
+		if (*--xa != *--xb)
+			return *xa < *xb ? -1 : 1;
+		if (xa <= xa0)
+			break;
+		}
+	return 0;
+	}
+
+ Bigint *
+diff
+#ifdef KR_headers
+	(a, b) Bigint *a, *b;
+#else
+	(Bigint *a, Bigint *b)
+#endif
+{
+	Bigint *c;
+	int i, wa, wb;
+	ULong *xa, *xae, *xb, *xbe, *xc;
+#ifdef ULLong
+	ULLong borrow, y;
+#else
+	ULong borrow, y;
+#ifdef Pack_32
+	ULong z;
+#endif
+#endif
+
+	i = cmp(a,b);
+	if (!i) {
+		c = Balloc(0);
+		c->wds = 1;
+		c->x[0] = 0;
+		return c;
+		}
+	if (i < 0) {
+		c = a;
+		a = b;
+		b = c;
+		i = 1;
+		}
+	else
+		i = 0;
+	c = Balloc(a->k);
+	c->sign = i;
+	wa = a->wds;
+	xa = a->x;
+	xae = xa + wa;
+	wb = b->wds;
+	xb = b->x;
+	xbe = xb + wb;
+	xc = c->x;
+	borrow = 0;
+#ifdef ULLong
+	do {
+		y = (ULLong)*xa++ - *xb++ - borrow;
+		borrow = y >> 32 & 1UL;
+		*xc++ = (ULong)(y & 0xffffffffUL);
+		}
+		while(xb < xbe);
+	while(xa < xae) {
+		y = *xa++ - borrow;
+		borrow = y >> 32 & 1UL;
+		*xc++ = (ULong)(y & 0xffffffffUL);
+		}
+#else
+#ifdef Pack_32
+	do {
+		y = (*xa & 0xffff) - (*xb & 0xffff) - borrow;
+		borrow = (y & 0x10000) >> 16;
+		z = (*xa++ >> 16) - (*xb++ >> 16) - borrow;
+		borrow = (z & 0x10000) >> 16;
+		Storeinc(xc, z, y);
+		}
+		while(xb < xbe);
+	while(xa < xae) {
+		y = (*xa & 0xffff) - borrow;
+		borrow = (y & 0x10000) >> 16;
+		z = (*xa++ >> 16) - borrow;
+		borrow = (z & 0x10000) >> 16;
+		Storeinc(xc, z, y);
+		}
+#else
+	do {
+		y = *xa++ - *xb++ - borrow;
+		borrow = (y & 0x10000) >> 16;
+		*xc++ = y & 0xffff;
+		}
+		while(xb < xbe);
+	while(xa < xae) {
+		y = *xa++ - borrow;
+		borrow = (y & 0x10000) >> 16;
+		*xc++ = y & 0xffff;
+		}
+#endif
+#endif
+	while(!*--xc)
+		wa--;
+	c->wds = wa;
+	return c;
+	}
+
+ double
+b2d
+#ifdef KR_headers
+	(a, e) Bigint *a; int *e;
+#else
+	(Bigint *a, int *e)
+#endif
+{
+	ULong *xa, *xa0, w, y, z;
+	int k;
+	U d;
+#ifdef VAX
+	ULong d0, d1;
+#else
+#define d0 word0(&d)
+#define d1 word1(&d)
+#endif
+
+	xa0 = a->x;
+	xa = xa0 + a->wds;
+	y = *--xa;
+#ifdef DEBUG
+	if (!y) Bug("zero y in b2d");
+#endif
+	k = hi0bits(y);
+	*e = 32 - k;
+#ifdef Pack_32
+	if (k < Ebits) {
+		d0 = Exp_1 | y >> (Ebits - k);
+		w = xa > xa0 ? *--xa : 0;
+		d1 = y << ((32-Ebits) + k) | w >> (Ebits - k);
+		goto ret_d;
+		}
+	z = xa > xa0 ? *--xa : 0;
+	if (k -= Ebits) {
+		d0 = Exp_1 | y << k | z >> (32 - k);
+		y = xa > xa0 ? *--xa : 0;
+		d1 = z << k | y >> (32 - k);
+		}
+	else {
+		d0 = Exp_1 | y;
+		d1 = z;
+		}
+#else
+	if (k < Ebits + 16) {
+		z = xa > xa0 ? *--xa : 0;
+		d0 = Exp_1 | y << k - Ebits | z >> Ebits + 16 - k;
+		w = xa > xa0 ? *--xa : 0;
+		y = xa > xa0 ? *--xa : 0;
+		d1 = z << k + 16 - Ebits | w << k - Ebits | y >> 16 + Ebits - k;
+		goto ret_d;
+		}
+	z = xa > xa0 ? *--xa : 0;
+	w = xa > xa0 ? *--xa : 0;
+	k -= Ebits + 16;
+	d0 = Exp_1 | y << k + 16 | z << k | w >> 16 - k;
+	y = xa > xa0 ? *--xa : 0;
+	d1 = w << k + 16 | y << k;
+#endif
+ ret_d:
+#ifdef VAX
+	word0(&d) = d0 >> 16 | d0 << 16;
+	word1(&d) = d1 >> 16 | d1 << 16;
+#endif
+	return dval(&d);
+	}
+#undef d0
+#undef d1
+
+ Bigint *
+d2b
+#ifdef KR_headers
+	(dd, e, bits) double dd; int *e, *bits;
+#else
+	(double dd, int *e, int *bits)
+#endif
+{
+	Bigint *b;
+	U d;
+#ifndef Sudden_Underflow
+	int i;
+#endif
+	int de, k;
+	ULong *x, y, z;
+#ifdef VAX
+	ULong d0, d1;
+#else
+#define d0 word0(&d)
+#define d1 word1(&d)
+#endif
+	d.d = dd;
+#ifdef VAX
+	d0 = word0(&d) >> 16 | word0(&d) << 16;
+	d1 = word1(&d) >> 16 | word1(&d) << 16;
+#endif
+
+#ifdef Pack_32
+	b = Balloc(1);
+#else
+	b = Balloc(2);
+#endif
+	x = b->x;
+
+	z = d0 & Frac_mask;
+	d0 &= 0x7fffffff;	/* clear sign bit, which we ignore */
+#ifdef Sudden_Underflow
+	de = (int)(d0 >> Exp_shift);
+#ifndef IBM
+	z |= Exp_msk11;
+#endif
+#else
+	if ( (de = (int)(d0 >> Exp_shift)) !=0)
+		z |= Exp_msk1;
+#endif
+#ifdef Pack_32
+	if ( (y = d1) !=0) {
+		if ( (k = lo0bits(&y)) !=0) {
+			x[0] = y | z << (32 - k);
+			z >>= k;
+			}
+		else
+			x[0] = y;
+#ifndef Sudden_Underflow
+		i =
+#endif
+		     b->wds = (x[1] = z) !=0 ? 2 : 1;
+		}
+	else {
+		k = lo0bits(&z);
+		x[0] = z;
+#ifndef Sudden_Underflow
+		i =
+#endif
+		    b->wds = 1;
+		k += 32;
+		}
+#else
+	if ( (y = d1) !=0) {
+		if ( (k = lo0bits(&y)) !=0)
+			if (k >= 16) {
+				x[0] = y | z << 32 - k & 0xffff;
+				x[1] = z >> k - 16 & 0xffff;
+				x[2] = z >> k;
+				i = 2;
+				}
+			else {
+				x[0] = y & 0xffff;
+				x[1] = y >> 16 | z << 16 - k & 0xffff;
+				x[2] = z >> k & 0xffff;
+				x[3] = z >> k+16;
+				i = 3;
+				}
+		else {
+			x[0] = y & 0xffff;
+			x[1] = y >> 16;
+			x[2] = z & 0xffff;
+			x[3] = z >> 16;
+			i = 3;
+			}
+		}
+	else {
+#ifdef DEBUG
+		if (!z)
+			Bug("Zero passed to d2b");
+#endif
+		k = lo0bits(&z);
+		if (k >= 16) {
+			x[0] = z;
+			i = 0;
+			}
+		else {
+			x[0] = z & 0xffff;
+			x[1] = z >> 16;
+			i = 1;
+			}
+		k += 32;
+		}
+	while(!x[i])
+		--i;
+	b->wds = i + 1;
+#endif
+#ifndef Sudden_Underflow
+	if (de) {
+#endif
+#ifdef IBM
+		*e = (de - Bias - (P-1) << 2) + k;
+		*bits = 4*P + 8 - k - hi0bits(word0(&d) & Frac_mask);
+#else
+		*e = de - Bias - (P-1) + k;
+		*bits = P - k;
+#endif
+#ifndef Sudden_Underflow
+		}
+	else {
+		*e = de - Bias - (P-1) + 1 + k;
+#ifdef Pack_32
+		*bits = 32*i - hi0bits(x[i-1]);
+#else
+		*bits = (i+2)*16 - hi0bits(x[i]);
+#endif
+		}
+#endif
+	return b;
+	}
+#undef d0
+#undef d1
+
+ CONST double
+#ifdef IEEE_Arith
+bigtens[] = { 1e16, 1e32, 1e64, 1e128, 1e256 };
+CONST double tinytens[] = { 1e-16, 1e-32, 1e-64, 1e-128, 1e-256
+		};
+#else
+#ifdef IBM
+bigtens[] = { 1e16, 1e32, 1e64 };
+CONST double tinytens[] = { 1e-16, 1e-32, 1e-64 };
+#else
+bigtens[] = { 1e16, 1e32 };
+CONST double tinytens[] = { 1e-16, 1e-32 };
+#endif
+#endif
+
+ CONST double
+tens[] = {
+		1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
+		1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
+		1e20, 1e21, 1e22
+#ifdef VAX
+		, 1e23, 1e24
+#endif
+		};
+
+ char *
+#ifdef KR_headers
+strcp_D2A(a, b) char *a; char *b;
+#else
+strcp_D2A(char *a, CONST char *b)
+#endif
+{
+	while((*a = *b++))
+		a++;
+	return a;
+	}
+
+#ifdef NO_STRING_H
+
+ Char *
+#ifdef KR_headers
+memcpy_D2A(a, b, len) Char *a; Char *b; size_t len;
+#else
+memcpy_D2A(void *a1, void *b1, size_t len)
+#endif
+{
+	char *a = (char*)a1, *ae = a + len;
+	char *b = (char*)b1, *a0 = a;
+	while(a < ae)
+		*a++ = *b++;
+	return a0;
+	}
+
+#endif /* NO_STRING_H */
diff --git a/libraries/gdtoa/qnan.c b/libraries/gdtoa/qnan.c
new file mode 100644
index 000000000..ea7e8745b
--- /dev/null
+++ b/libraries/gdtoa/qnan.c
@@ -0,0 +1,119 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 2005 by David M. Gay
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that the copyright notice and this permission notice and warranty
+disclaimer appear in supporting documentation, and that the name of
+the author or any of his current or former employers not be used in
+advertising or publicity pertaining to distribution of the software
+without specific, written prior permission.
+
+THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.  IN
+NO EVENT SHALL THE AUTHOR OR ANY OF HIS CURRENT OR FORMER EMPLOYERS BE
+LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
+DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+/* Program to compute quiet NaNs of various precisions (float,	*/
+/* double, and perhaps long double) on the current system,	*/
+/* provided the system uses binary IEEE (P754) arithmetic.	*/
+/* Note that one system's quiet NaN may be a signaling NaN on	*/
+/* another system.  The IEEE arithmetic standards (P754, P854)	*/
+/* do not specify how to distinguish signaling NaNs from quiet	*/
+/* ones, and this detail varies across systems.	 The computed	*/
+/* NaN values are encoded in #defines for values for an		*/
+/* unsigned 32-bit integer type, called Ulong below, and	*/
+/* (for long double) perhaps as unsigned short values.  Once	*/
+/* upon a time, there were PC compilers for Intel CPUs that	*/
+/* had sizeof(long double) = 10.  Are such compilers still	*/
+/* distributed?							*/
+
+#include <stdio.h>
+#include "arith.h"
+
+#ifndef Long
+#define Long long
+#endif
+
+typedef unsigned Long Ulong;
+
+#undef HAVE_IEEE
+#ifdef IEEE_8087
+#define _0 1
+#define _1 0
+#define _3 3
+#if defined(Gen_ld_QNAN) && !defined(NO_LONG_LONG)
+static int perm[4] = { 0, 1, 2, 3 };
+#endif
+#define HAVE_IEEE
+#endif
+#ifdef IEEE_MC68k
+#define _0 0
+#define _1 1
+#define _3 0
+#if defined(Gen_ld_QNAN) && !defined(NO_LONG_LONG)
+static int perm[4] = { 3, 2, 1, 0 };
+#endif
+#define HAVE_IEEE
+#endif
+
+#define UL (unsigned long)
+
+ int
+main(void)
+{
+#ifdef HAVE_IEEE
+	typedef union {
+		float f;
+		double d;
+		Ulong L[4];
+#ifndef NO_LONG_LONG
+		unsigned short u[5];
+		long double D;
+#endif
+		} U;
+	U a, b, c;
+#if defined(Gen_ld_QNAN) && !defined(NO_LONG_LONG)
+	int i;
+#endif
+
+	a.L[0] = b.L[0] = 0x7f800000;
+	c.f = a.f - b.f;
+	printf("#define f_QNAN 0x%lx\n", UL (c.L[0] & 0x7fffffff));
+	a.L[_0] = b.L[_0] = 0x7ff00000;
+	a.L[_1] = b.L[_1] = 0;
+	c.d = a.d - b.d;	/* quiet NaN */
+	c.L[_0] &= 0x7fffffff;
+	printf("#define d_QNAN0 0x%lx\n", UL c.L[_0]);
+	printf("#define d_QNAN1 0x%lx\n", UL c.L[_1]);
+#ifndef NO_LONG_LONG
+#ifdef Gen_ld_QNAN
+	if (sizeof(a.D) >= 16) {
+		b.D = c.D = a.d;
+		if (printf("") < 0)
+			c.D = 37;	/* never executed; just defeat optimization */
+		a.L[0] = a.L[1] = a.L[2] = a.L[3] = 0;
+		a.D = b.D - c.D;
+		a.L[_3] &= 0x7fffffff;
+		for(i = 0; i < 4; i++)
+			printf("#define ld_QNAN%d 0x%lx\n", i, UL a.L[perm[i]]);
+		}
+#endif
+#endif
+#endif /* HAVE_IEEE */
+	return 0;
+	}
diff --git a/libraries/gdtoa/qnan.obj b/libraries/gdtoa/qnan.obj
new file mode 100644
index 000000000..994af47d9
Binary files /dev/null and b/libraries/gdtoa/qnan.obj differ
diff --git a/libraries/gdtoa/smisc.c b/libraries/gdtoa/smisc.c
new file mode 100644
index 000000000..f4dbafb21
--- /dev/null
+++ b/libraries/gdtoa/smisc.c
@@ -0,0 +1,191 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998, 1999 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+ Bigint *
+s2b
+#ifdef KR_headers
+	(s, nd0, nd, y9, dplen) CONST char *s; int dplen, nd0, nd; ULong y9;
+#else
+	(CONST char *s, int nd0, int nd, ULong y9, int dplen)
+#endif
+{
+	Bigint *b;
+	int i, k;
+	Long x, y;
+
+	x = (nd + 8) / 9;
+	for(k = 0, y = 1; x > y; y <<= 1, k++) ;
+#ifdef Pack_32
+	b = Balloc(k);
+	b->x[0] = y9;
+	b->wds = 1;
+#else
+	b = Balloc(k+1);
+	b->x[0] = y9 & 0xffff;
+	b->wds = (b->x[1] = y9 >> 16) ? 2 : 1;
+#endif
+
+	i = 9;
+	if (9 < nd0) {
+		s += 9;
+		do b = multadd(b, 10, *s++ - '0');
+			while(++i < nd0);
+		s += dplen;
+		}
+	else
+		s += dplen + 9;
+	for(; i < nd; i++)
+		b = multadd(b, 10, *s++ - '0');
+	return b;
+	}
+
+ double
+ratio
+#ifdef KR_headers
+	(a, b) Bigint *a, *b;
+#else
+	(Bigint *a, Bigint *b)
+#endif
+{
+	U da, db;
+	int k, ka, kb;
+
+	dval(&da) = b2d(a, &ka);
+	dval(&db) = b2d(b, &kb);
+	k = ka - kb + ULbits*(a->wds - b->wds);
+#ifdef IBM
+	if (k > 0) {
+		word0(&da) += (k >> 2)*Exp_msk1;
+		if (k &= 3)
+			dval(&da) *= 1 << k;
+		}
+	else {
+		k = -k;
+		word0(&db) += (k >> 2)*Exp_msk1;
+		if (k &= 3)
+			dval(&db) *= 1 << k;
+		}
+#else
+	if (k > 0)
+		word0(&da) += k*Exp_msk1;
+	else {
+		k = -k;
+		word0(&db) += k*Exp_msk1;
+		}
+#endif
+	return dval(&da) / dval(&db);
+	}
+
+#ifdef INFNAN_CHECK
+
+ int
+match
+#ifdef KR_headers
+	(sp, t) char **sp, *t;
+#else
+	(CONST char **sp, char *t)
+#endif
+{
+	int c, d;
+	CONST char *s = *sp;
+
+	while( (d = *t++) !=0) {
+		if ((c = *++s) >= 'A' && c <= 'Z')
+			c += 'a' - 'A';
+		if (c != d)
+			return 0;
+		}
+	*sp = s + 1;
+	return 1;
+	}
+#endif /* INFNAN_CHECK */
+
+ void
+#ifdef KR_headers
+copybits(c, n, b) ULong *c; int n; Bigint *b;
+#else
+copybits(ULong *c, int n, Bigint *b)
+#endif
+{
+	ULong *ce, *x, *xe;
+#ifdef Pack_16
+	int nw, nw1;
+#endif
+
+	ce = c + ((n-1) >> kshift) + 1;
+	x = b->x;
+#ifdef Pack_32
+	xe = x + b->wds;
+	while(x < xe)
+		*c++ = *x++;
+#else
+	nw = b->wds;
+	nw1 = nw & 1;
+	for(xe = x + (nw - nw1); x < xe; x += 2)
+		Storeinc(c, x[1], x[0]);
+	if (nw1)
+		*c++ = *x;
+#endif
+	while(c < ce)
+		*c++ = 0;
+	}
+
+ ULong
+#ifdef KR_headers
+any_on(b, k) Bigint *b; int k;
+#else
+any_on(Bigint *b, int k)
+#endif
+{
+	int n, nwds;
+	ULong *x, *x0, x1, x2;
+
+	x = b->x;
+	nwds = b->wds;
+	n = k >> kshift;
+	if (n > nwds)
+		n = nwds;
+	else if (n < nwds && (k &= kmask)) {
+		x1 = x2 = x[n];
+		x1 >>= k;
+		x1 <<= k;
+		if (x1 != x2)
+			return 1;
+		}
+	x0 = x;
+	x += n;
+	while(x > x0)
+		if (*--x)
+			return 1;
+	return 0;
+	}
diff --git a/libraries/gdtoa/strtoIQ.c b/libraries/gdtoa/strtoIQ.c
new file mode 100644
index 000000000..9ce5120e6
--- /dev/null
+++ b/libraries/gdtoa/strtoIQ.c
@@ -0,0 +1,63 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+ int
+#ifdef KR_headers
+strtoIQ(s, sp, a, b) CONST char *s; char **sp; void *a; void *b;
+#else
+strtoIQ(CONST char *s, char **sp, void *a, void *b)
+#endif
+{
+	static FPI fpi = { 113, 1-16383-113+1, 32766-16383-113+1, 1, SI };
+	Long exp[2];
+	Bigint *B[2];
+	int k, rv[2];
+	ULong *L = (ULong *)a, *M = (ULong *)b;
+
+	B[0] = Balloc(2);
+	B[0]->wds = 4;
+	k = strtoIg(s, sp, &fpi, exp, B, rv);
+	ULtoQ(L, B[0]->x, exp[0], rv[0]);
+	Bfree(B[0]);
+	if (B[1]) {
+		ULtoQ(M, B[1]->x, exp[1], rv[1]);
+		Bfree(B[1]);
+		}
+	else {
+		M[0] = L[0];
+		M[1] = L[1];
+		M[2] = L[2];
+		M[3] = L[3];
+		}
+	return k;
+	}
diff --git a/libraries/gdtoa/strtoId.c b/libraries/gdtoa/strtoId.c
new file mode 100644
index 000000000..1c97d382d
--- /dev/null
+++ b/libraries/gdtoa/strtoId.c
@@ -0,0 +1,60 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+ int
+#ifdef KR_headers
+strtoId(s, sp, f0, f1) CONST char *s; char **sp; double *f0, *f1;
+#else
+strtoId(CONST char *s, char **sp, double *f0, double *f1)
+#endif
+{
+	static FPI fpi = { 53, 1-1023-53+1, 2046-1023-53+1, 1, SI };
+	Long exp[2];
+	Bigint *B[2];
+	int k, rv[2];
+
+	B[0] = Balloc(1);
+	B[0]->wds = 2;
+	k = strtoIg(s, sp, &fpi, exp, B, rv);
+	ULtod((ULong*)f0, B[0]->x, exp[0], rv[0]);
+	Bfree(B[0]);
+	if (B[1]) {
+		ULtod((ULong*)f1, B[1]->x, exp[1], rv[1]);
+		Bfree(B[1]);
+		}
+	else {
+		((ULong*)f1)[0] = ((ULong*)f0)[0];
+		((ULong*)f1)[1] = ((ULong*)f0)[1];
+		}
+	return k;
+	}
diff --git a/libraries/gdtoa/strtoIdd.c b/libraries/gdtoa/strtoIdd.c
new file mode 100644
index 000000000..40b7936bc
--- /dev/null
+++ b/libraries/gdtoa/strtoIdd.c
@@ -0,0 +1,66 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+ int
+#ifdef KR_headers
+strtoIdd(s, sp, f0, f1) CONST char *s; char **sp; double *f0, *f1;
+#else
+strtoIdd(CONST char *s, char **sp, double *f0, double *f1)
+#endif
+{
+#ifdef Sudden_Underflow
+	static FPI fpi = { 106, 1-1023, 2046-1023-106+1, 1, 1 };
+#else
+	static FPI fpi = { 106, 1-1023-53+1, 2046-1023-106+1, 1, 0 };
+#endif
+	Long exp[2];
+	Bigint *B[2];
+	int k, rv[2];
+
+	B[0] = Balloc(2);
+	B[0]->wds = 4;
+	k = strtoIg(s, sp, &fpi, exp, B, rv);
+	ULtodd((ULong*)f0, B[0]->x, exp[0], rv[0]);
+	Bfree(B[0]);
+	if (B[1]) {
+		ULtodd((ULong*)f1, B[1]->x, exp[1], rv[1]);
+		Bfree(B[1]);
+		}
+	else {
+		((ULong*)f1)[0] = ((ULong*)f0)[0];
+		((ULong*)f1)[1] = ((ULong*)f0)[1];
+		((ULong*)f1)[2] = ((ULong*)f0)[2];
+		((ULong*)f1)[3] = ((ULong*)f0)[3];
+		}
+	return k;
+	}
diff --git a/libraries/gdtoa/strtoIf.c b/libraries/gdtoa/strtoIf.c
new file mode 100644
index 000000000..65ecab2e0
--- /dev/null
+++ b/libraries/gdtoa/strtoIf.c
@@ -0,0 +1,58 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+ int
+#ifdef KR_headers
+strtoIf(s, sp, f0, f1) CONST char *s; char **sp; float *f0, *f1;
+#else
+strtoIf(CONST char *s, char **sp, float *f0, float *f1)
+#endif
+{
+	static FPI fpi = { 24, 1-127-24+1,  254-127-24+1, 1, SI };
+	Long exp[2];
+	Bigint *B[2];
+	int k, rv[2];
+
+	B[0] = Balloc(0);
+	B[0]->wds = 1;
+	k = strtoIg(s, sp, &fpi, exp, B, rv);
+	ULtof((ULong*)f0, B[0]->x, exp[0], rv[0]);
+	Bfree(B[0]);
+	if (B[1]) {
+		ULtof((ULong*)f1, B[1]->x, exp[1], rv[1]);
+		Bfree(B[1]);
+		}
+	else
+		*(ULong*)f1 = *(ULong*)f0;
+	return k;
+	}
diff --git a/libraries/gdtoa/strtoIg.c b/libraries/gdtoa/strtoIg.c
new file mode 100644
index 000000000..6a17760cf
--- /dev/null
+++ b/libraries/gdtoa/strtoIg.c
@@ -0,0 +1,137 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+ int
+#ifdef KR_headers
+strtoIg(s00, se, fpi, exp, B, rvp) CONST char *s00; char **se; FPI *fpi; Long *exp; Bigint **B; int *rvp;
+#else
+strtoIg(CONST char *s00, char **se, FPI *fpi, Long *exp, Bigint **B, int *rvp)
+#endif
+{
+	Bigint *b, *b1;
+	int i, nb, nw, nw1, rv, rv1, swap;
+	unsigned int nb1, nb11;
+	Long e1;
+
+	b = *B;
+	rv = strtodg(s00, se, fpi, exp, b->x);
+	if (!(rv & STRTOG_Inexact)) {
+		B[1] = 0;
+		return *rvp = rv;
+		}
+	e1 = exp[0];
+	rv1 = rv ^ STRTOG_Inexact;
+	b1 = Balloc(b->k);
+	Bcopy(b1, b);
+	nb = fpi->nbits;
+	nb1 = nb & 31;
+	nb11 = (nb1 - 1) & 31;
+	nw = b->wds;
+	nw1 = nw - 1;
+	if (rv & STRTOG_Inexlo) {
+		swap = 0;
+		b1 = increment(b1);
+		if ((rv & STRTOG_Retmask) == STRTOG_Zero) {
+			if (fpi->sudden_underflow) {
+				b1->x[0] = 0;
+				b1->x[nw1] = 1L << nb11;
+				rv1 += STRTOG_Normal - STRTOG_Zero;
+				rv1 &= ~STRTOG_Underflow;
+				goto swapcheck;
+				}
+			rv1 &= STRTOG_Inexlo | STRTOG_Underflow | STRTOG_Zero;
+			rv1 |= STRTOG_Inexhi | STRTOG_Denormal;
+			goto swapcheck;
+			}
+		if (b1->wds > nw
+		 || (nb1 && b1->x[nw1] & 1L << nb1)) {
+			if (++e1 > fpi->emax)
+				rv1 = STRTOG_Infinite | STRTOG_Inexhi;
+			rshift(b1, 1);
+			}
+		else if ((rv & STRTOG_Retmask) == STRTOG_Denormal) {
+			if (b1->x[nw1] & 1L << nb11) {
+				rv1 += STRTOG_Normal - STRTOG_Denormal;
+				rv1 &= ~STRTOG_Underflow;
+				}
+			}
+		}
+	else {
+		swap = STRTOG_Neg;
+		if ((rv & STRTOG_Retmask) == STRTOG_Infinite) {
+			b1 = set_ones(b1, nb);
+			e1 = fpi->emax;
+			rv1 = STRTOG_Normal | STRTOG_Inexlo;
+			goto swapcheck;
+			}
+		decrement(b1);
+		if ((rv & STRTOG_Retmask) == STRTOG_Denormal) {
+			for(i = nw1; !b1->x[i]; --i)
+				if (!i) {
+					rv1 = STRTOG_Zero | STRTOG_Inexlo;
+					break;
+					}
+			goto swapcheck;
+			}
+		if (!(b1->x[nw1] & 1L << nb11)) {
+			if (e1 == fpi->emin) {
+				if (fpi->sudden_underflow)
+					rv1 += STRTOG_Zero - STRTOG_Normal;
+				else
+					rv1 += STRTOG_Denormal - STRTOG_Normal;
+				rv1 |= STRTOG_Underflow;
+				}
+			else {
+				b1 = lshift(b1, 1);
+				b1->x[0] |= 1;
+				--e1;
+				}
+			}
+		}
+ swapcheck:
+	if (swap ^ (rv & STRTOG_Neg)) {
+		rvp[0] = rv1;
+		rvp[1] = rv;
+		B[0] = b1;
+		B[1] = b;
+		exp[1] = exp[0];
+		exp[0] = e1;
+		}
+	else {
+		rvp[0] = rv;
+		rvp[1] = rv1;
+		B[1] = b1;
+		exp[1] = e1;
+		}
+	return rv;
+	}
diff --git a/libraries/gdtoa/strtoIx.c b/libraries/gdtoa/strtoIx.c
new file mode 100644
index 000000000..783a631f0
--- /dev/null
+++ b/libraries/gdtoa/strtoIx.c
@@ -0,0 +1,64 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+ int
+#ifdef KR_headers
+strtoIx(s, sp, a, b) CONST char *s; char **sp; void *a; void *b;
+#else
+strtoIx(CONST char *s, char **sp, void *a, void *b)
+#endif
+{
+	static FPI fpi = { 64, 1-16383-64+1, 32766 - 16383 - 64 + 1, 1, SI };
+	Long exp[2];
+	Bigint *B[2];
+	int k, rv[2];
+	UShort *L = (UShort *)a, *M = (UShort *)b;
+
+	B[0] = Balloc(1);
+	B[0]->wds = 2;
+	k = strtoIg(s, sp, &fpi, exp, B, rv);
+	ULtox(L, B[0]->x, exp[0], rv[0]);
+	Bfree(B[0]);
+	if (B[1]) {
+		ULtox(M, B[1]->x, exp[1], rv[1]);
+		Bfree(B[1]);
+		}
+	else {
+		M[0] = L[0];
+		M[1] = L[1];
+		M[2] = L[2];
+		M[3] = L[3];
+		M[4] = L[4];
+		}
+	return k;
+	}
diff --git a/libraries/gdtoa/strtoIxL.c b/libraries/gdtoa/strtoIxL.c
new file mode 100644
index 000000000..869bfd16f
--- /dev/null
+++ b/libraries/gdtoa/strtoIxL.c
@@ -0,0 +1,62 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+ int
+#ifdef KR_headers
+strtoIxL(s, sp, a, b) CONST char *s; char **sp; void *a; void *b;
+#else
+strtoIxL(CONST char *s, char **sp, void *a, void *b)
+#endif
+{
+	static FPI fpi = { 64, 1-16383-64+1, 32766 - 16383 - 64 + 1, 1, SI };
+	Long exp[2];
+	Bigint *B[2];
+	int k, rv[2];
+	ULong *L = (ULong *)a, *M = (ULong *)b;
+
+	B[0] = Balloc(1);
+	B[0]->wds = 2;
+	k = strtoIg(s, sp, &fpi, exp, B, rv);
+	ULtoxL(L, B[0]->x, exp[0], rv[0]);
+	Bfree(B[0]);
+	if (B[1]) {
+		ULtoxL(M, B[1]->x, exp[1], rv[1]);
+		Bfree(B[1]);
+		}
+	else {
+		M[0] = L[0];
+		M[1] = L[1];
+		M[2] = L[2];
+		}
+	return k;
+	}
diff --git a/libraries/gdtoa/strtod.c b/libraries/gdtoa/strtod.c
new file mode 100644
index 000000000..3c2230053
--- /dev/null
+++ b/libraries/gdtoa/strtod.c
@@ -0,0 +1,1074 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998-2001 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+#if !defined(NO_FENV_H) && !defined(_MSC_VER)
+#include <fenv.h>
+#endif
+
+#ifdef USE_LOCALE
+#include "locale.h"
+#endif
+
+#ifdef IEEE_Arith
+#ifndef NO_IEEE_Scale
+#define Avoid_Underflow
+#undef tinytens
+/* The factor of 2^106 in tinytens[4] helps us avoid setting the underflow */
+/* flag unnecessarily.  It leads to a song and dance at the end of strtod. */
+static CONST double tinytens[] = { 1e-16, 1e-32, 1e-64, 1e-128,
+		9007199254740992.*9007199254740992.e-256
+		};
+#endif
+#endif
+
+#ifdef Honor_FLT_ROUNDS
+#undef Check_FLT_ROUNDS
+#define Check_FLT_ROUNDS
+#else
+#define Rounding Flt_Rounds
+#endif
+
+#ifdef Avoid_Underflow /*{*/
+ static double
+sulp
+#ifdef KR_headers
+	(x, scale) U *x; int scale;
+#else
+	(U *x, int scale)
+#endif
+{
+	U u;
+	double rv;
+	int i;
+
+	rv = ulp(x);
+	if (!scale || (i = 2*P + 1 - ((word0(x) & Exp_mask) >> Exp_shift)) <= 0)
+		return rv; /* Is there an example where i <= 0 ? */
+	word0(&u) = Exp_1 + (i << Exp_shift);
+	word1(&u) = 0;
+	return rv * u.d;
+	}
+#endif /*}*/
+
+ double
+strtod
+#ifdef KR_headers
+	(s00, se) CONST char *s00; char **se;
+#else
+	(CONST char *s00, char **se)
+#endif
+{
+#ifdef Avoid_Underflow
+	int scale;
+#endif
+	int bb2, bb5, bbe, bd2, bd5, bbbits, bs2, c, decpt, dsign,
+		 e, e1, esign, i, j, k, nd, nd0, nf, nz, nz0, sign;
+	CONST char *s, *s0, *s1;
+	double aadj;
+	Long L;
+	U adj, aadj1, rv, rv0;
+	ULong y, z;
+	Bigint *bb, *bb1, *bd, *bd0, *bs, *delta;
+#ifdef Avoid_Underflow
+	ULong Lsb, Lsb1;
+#endif
+#ifdef SET_INEXACT
+	int inexact, oldinexact;
+#endif
+#ifdef USE_LOCALE /*{{*/
+#ifdef NO_LOCALE_CACHE
+	char *decimalpoint = localeconv()->decimal_point;
+	int dplen = strlen(decimalpoint);
+#else
+	char *decimalpoint;
+	static char *decimalpoint_cache;
+	static int dplen;
+	if (!(s0 = decimalpoint_cache)) {
+		s0 = localeconv()->decimal_point;
+		if ((decimalpoint_cache = (char*)MALLOC(strlen(s0) + 1))) {
+			strcpy(decimalpoint_cache, s0);
+			s0 = decimalpoint_cache;
+			}
+		dplen = strlen(s0);
+		}
+	decimalpoint = (char*)s0;
+#endif /*NO_LOCALE_CACHE*/
+#else  /*USE_LOCALE}{*/
+#define dplen 1
+#endif /*USE_LOCALE}}*/
+
+#ifdef Honor_FLT_ROUNDS /*{*/
+	int Rounding;
+#ifdef Trust_FLT_ROUNDS /*{{ only define this if FLT_ROUNDS really works! */
+	Rounding = Flt_Rounds;
+#else /*}{*/
+	Rounding = 1;
+	switch(fegetround()) {
+	  case FE_TOWARDZERO:	Rounding = 0; break;
+	  case FE_UPWARD:	Rounding = 2; break;
+	  case FE_DOWNWARD:	Rounding = 3;
+	  }
+#endif /*}}*/
+#endif /*}*/
+
+	sign = nz0 = nz = decpt = 0;
+	dval(&rv) = 0.;
+	for(s = s00;;s++) switch(*s) {
+		case '-':
+			sign = 1;
+			/* no break */
+		case '+':
+			if (*++s)
+				goto break2;
+			/* no break */
+		case 0:
+			goto ret0;
+		case '\t':
+		case '\n':
+		case '\v':
+		case '\f':
+		case '\r':
+		case ' ':
+			continue;
+		default:
+			goto break2;
+		}
+ break2:
+	if (*s == '0') {
+#ifndef NO_HEX_FP /*{*/
+		{
+		static FPI fpi = { 53, 1-1023-53+1, 2046-1023-53+1, 1, SI };
+		Long exp;
+		ULong bits[2];
+		switch(s[1]) {
+		  case 'x':
+		  case 'X':
+			{
+#ifdef Honor_FLT_ROUNDS
+			FPI fpi1 = fpi;
+			fpi1.rounding = Rounding;
+#else
+#define fpi1 fpi
+#endif
+			switch((i = gethex(&s, &fpi1, &exp, &bb, sign)) & STRTOG_Retmask) {
+			  case STRTOG_NoNumber:
+				s = s00;
+				sign = 0;
+			  case STRTOG_Zero:
+				break;
+			  default:
+				if (bb) {
+					copybits(bits, fpi.nbits, bb);
+					Bfree(bb);
+					}
+				ULtod(((U*)&rv)->L, bits, exp, i);
+			  }}
+			goto ret;
+		  }
+		}
+#endif /*}*/
+		nz0 = 1;
+		while(*++s == '0') ;
+		if (!*s)
+			goto ret;
+		}
+	s0 = s;
+	y = z = 0;
+	for(nd = nf = 0; (c = *s) >= '0' && c <= '9'; nd++, s++)
+		if (nd < 9)
+			y = 10*y + c - '0';
+		else if (nd < 16)
+			z = 10*z + c - '0';
+	nd0 = nd;
+#ifdef USE_LOCALE
+	if (c == *decimalpoint) {
+		for(i = 1; decimalpoint[i]; ++i)
+			if (s[i] != decimalpoint[i])
+				goto dig_done;
+		s += i;
+		c = *s;
+#else
+	if (c == '.') {
+		c = *++s;
+#endif
+		decpt = 1;
+		if (!nd) {
+			for(; c == '0'; c = *++s)
+				nz++;
+			if (c > '0' && c <= '9') {
+				s0 = s;
+				nf += nz;
+				nz = 0;
+				goto have_dig;
+				}
+			goto dig_done;
+			}
+		for(; c >= '0' && c <= '9'; c = *++s) {
+ have_dig:
+			nz++;
+			if (c -= '0') {
+				nf += nz;
+				for(i = 1; i < nz; i++)
+					if (nd++ < 9)
+						y *= 10;
+					else if (nd <= DBL_DIG + 1)
+						z *= 10;
+				if (nd++ < 9)
+					y = 10*y + c;
+				else if (nd <= DBL_DIG + 1)
+					z = 10*z + c;
+				nz = 0;
+				}
+			}
+		}/*}*/
+ dig_done:
+	e = 0;
+	if (c == 'e' || c == 'E') {
+		if (!nd && !nz && !nz0) {
+			goto ret0;
+			}
+		s00 = s;
+		esign = 0;
+		switch(c = *++s) {
+			case '-':
+				esign = 1;
+			case '+':
+				c = *++s;
+			}
+		if (c >= '0' && c <= '9') {
+			while(c == '0')
+				c = *++s;
+			if (c > '0' && c <= '9') {
+				L = c - '0';
+				s1 = s;
+				while((c = *++s) >= '0' && c <= '9')
+					L = 10*L + c - '0';
+				if (s - s1 > 8 || L > 19999)
+					/* Avoid confusion from exponents
+					 * so large that e might overflow.
+					 */
+					e = 19999; /* safe for 16 bit ints */
+				else
+					e = (int)L;
+				if (esign)
+					e = -e;
+				}
+			else
+				e = 0;
+			}
+		else
+			s = s00;
+		}
+	if (!nd) {
+		if (!nz && !nz0) {
+#ifdef INFNAN_CHECK
+			/* Check for Nan and Infinity */
+			ULong bits[2];
+			static FPI fpinan =	/* only 52 explicit bits */
+				{ 52, 1-1023-53+1, 2046-1023-53+1, 1, SI };
+			if (!decpt)
+			 switch(c) {
+			  case 'i':
+			  case 'I':
+				if (match(&s,"nf")) {
+					--s;
+					if (!match(&s,"inity"))
+						++s;
+					word0(&rv) = 0x7ff00000;
+					word1(&rv) = 0;
+					goto ret;
+					}
+				break;
+			  case 'n':
+			  case 'N':
+				if (match(&s, "an")) {
+#ifndef No_Hex_NaN
+					if (*s == '(' /*)*/
+					 && hexnan(&s, &fpinan, bits)
+							== STRTOG_NaNbits) {
+						word0(&rv) = 0x7ff00000 | bits[1];
+						word1(&rv) = bits[0];
+						}
+					else {
+#endif
+						word0(&rv) = NAN_WORD0;
+						word1(&rv) = NAN_WORD1;
+#ifndef No_Hex_NaN
+						}
+#endif
+					goto ret;
+					}
+			  }
+#endif /* INFNAN_CHECK */
+ ret0:
+			s = s00;
+			sign = 0;
+			}
+		goto ret;
+		}
+	e1 = e -= nf;
+
+	/* Now we have nd0 digits, starting at s0, followed by a
+	 * decimal point, followed by nd-nd0 digits.  The number we're
+	 * after is the integer represented by those digits times
+	 * 10**e */
+
+	if (!nd0)
+		nd0 = nd;
+	k = nd < DBL_DIG + 1 ? nd : DBL_DIG + 1;
+	dval(&rv) = y;
+	if (k > 9) {
+#ifdef SET_INEXACT
+		if (k > DBL_DIG)
+			oldinexact = get_inexact();
+#endif
+		dval(&rv) = tens[k - 9] * dval(&rv) + z;
+		}
+	bd0 = 0;
+	if (nd <= DBL_DIG
+#ifndef RND_PRODQUOT
+#ifndef Honor_FLT_ROUNDS
+		&& Flt_Rounds == 1
+#endif
+#endif
+			) {
+		if (!e)
+			goto ret;
+#ifndef ROUND_BIASED_without_Round_Up
+		if (e > 0) {
+			if (e <= Ten_pmax) {
+#ifdef VAX
+				goto vax_ovfl_check;
+#else
+#ifdef Honor_FLT_ROUNDS
+				/* round correctly FLT_ROUNDS = 2 or 3 */
+				if (sign) {
+					rv.d = -rv.d;
+					sign = 0;
+					}
+#endif
+				/* rv = */ rounded_product(dval(&rv), tens[e]);
+				goto ret;
+#endif
+				}
+			i = DBL_DIG - nd;
+			if (e <= Ten_pmax + i) {
+				/* A fancier test would sometimes let us do
+				 * this for larger i values.
+				 */
+#ifdef Honor_FLT_ROUNDS
+				/* round correctly FLT_ROUNDS = 2 or 3 */
+				if (sign) {
+					rv.d = -rv.d;
+					sign = 0;
+					}
+#endif
+				e -= i;
+				dval(&rv) *= tens[i];
+#ifdef VAX
+				/* VAX exponent range is so narrow we must
+				 * worry about overflow here...
+				 */
+ vax_ovfl_check:
+				word0(&rv) -= P*Exp_msk1;
+				/* rv = */ rounded_product(dval(&rv), tens[e]);
+				if ((word0(&rv) & Exp_mask)
+				 > Exp_msk1*(DBL_MAX_EXP+Bias-1-P))
+					goto ovfl;
+				word0(&rv) += P*Exp_msk1;
+#else
+				/* rv = */ rounded_product(dval(&rv), tens[e]);
+#endif
+				goto ret;
+				}
+			}
+#ifndef Inaccurate_Divide
+		else if (e >= -Ten_pmax) {
+#ifdef Honor_FLT_ROUNDS
+			/* round correctly FLT_ROUNDS = 2 or 3 */
+			if (sign) {
+				rv.d = -rv.d;
+				sign = 0;
+				}
+#endif
+			/* rv = */ rounded_quotient(dval(&rv), tens[-e]);
+			goto ret;
+			}
+#endif
+#endif /* ROUND_BIASED_without_Round_Up */
+		}
+	e1 += nd - k;
+
+#ifdef IEEE_Arith
+#ifdef SET_INEXACT
+	inexact = 1;
+	if (k <= DBL_DIG)
+		oldinexact = get_inexact();
+#endif
+#ifdef Avoid_Underflow
+	scale = 0;
+#endif
+#ifdef Honor_FLT_ROUNDS
+	if (Rounding >= 2) {
+		if (sign)
+			Rounding = Rounding == 2 ? 0 : 2;
+		else
+			if (Rounding != 2)
+				Rounding = 0;
+		}
+#endif
+#endif /*IEEE_Arith*/
+
+	/* Get starting approximation = rv * 10**e1 */
+
+	if (e1 > 0) {
+		if ( (i = e1 & 15) !=0)
+			dval(&rv) *= tens[i];
+		if (e1 &= ~15) {
+			if (e1 > DBL_MAX_10_EXP) {
+ ovfl:
+				/* Can't trust HUGE_VAL */
+#ifdef IEEE_Arith
+#ifdef Honor_FLT_ROUNDS
+				switch(Rounding) {
+				  case 0: /* toward 0 */
+				  case 3: /* toward -infinity */
+					word0(&rv) = Big0;
+					word1(&rv) = Big1;
+					break;
+				  default:
+					word0(&rv) = Exp_mask;
+					word1(&rv) = 0;
+				  }
+#else /*Honor_FLT_ROUNDS*/
+				word0(&rv) = Exp_mask;
+				word1(&rv) = 0;
+#endif /*Honor_FLT_ROUNDS*/
+#ifdef SET_INEXACT
+				/* set overflow bit */
+				dval(&rv0) = 1e300;
+				dval(&rv0) *= dval(&rv0);
+#endif
+#else /*IEEE_Arith*/
+				word0(&rv) = Big0;
+				word1(&rv) = Big1;
+#endif /*IEEE_Arith*/
+ range_err:
+				if (bd0) {
+					Bfree(bb);
+					Bfree(bd);
+					Bfree(bs);
+					Bfree(bd0);
+					Bfree(delta);
+					}
+#ifndef NO_ERRNO
+				errno = ERANGE;
+#endif
+				goto ret;
+				}
+			e1 >>= 4;
+			for(j = 0; e1 > 1; j++, e1 >>= 1)
+				if (e1 & 1)
+					dval(&rv) *= bigtens[j];
+		/* The last multiplication could overflow. */
+			word0(&rv) -= P*Exp_msk1;
+			dval(&rv) *= bigtens[j];
+			if ((z = word0(&rv) & Exp_mask)
+			 > Exp_msk1*(DBL_MAX_EXP+Bias-P))
+				goto ovfl;
+			if (z > Exp_msk1*(DBL_MAX_EXP+Bias-1-P)) {
+				/* set to largest number */
+				/* (Can't trust DBL_MAX) */
+				word0(&rv) = Big0;
+				word1(&rv) = Big1;
+				}
+			else
+				word0(&rv) += P*Exp_msk1;
+			}
+		}
+	else if (e1 < 0) {
+		e1 = -e1;
+		if ( (i = e1 & 15) !=0)
+			dval(&rv) /= tens[i];
+		if (e1 >>= 4) {
+			if (e1 >= 1 << n_bigtens)
+				goto undfl;
+#ifdef Avoid_Underflow
+			if (e1 & Scale_Bit)
+				scale = 2*P;
+			for(j = 0; e1 > 0; j++, e1 >>= 1)
+				if (e1 & 1)
+					dval(&rv) *= tinytens[j];
+			if (scale && (j = 2*P + 1 - ((word0(&rv) & Exp_mask)
+						>> Exp_shift)) > 0) {
+				/* scaled rv is denormal; zap j low bits */
+				if (j >= 32) {
+					word1(&rv) = 0;
+					if (j >= 53)
+					 word0(&rv) = (P+2)*Exp_msk1;
+					else
+					 word0(&rv) &= 0xffffffff << (j-32);
+					}
+				else
+					word1(&rv) &= 0xffffffff << j;
+				}
+#else
+			for(j = 0; e1 > 1; j++, e1 >>= 1)
+				if (e1 & 1)
+					dval(&rv) *= tinytens[j];
+			/* The last multiplication could underflow. */
+			dval(&rv0) = dval(&rv);
+			dval(&rv) *= tinytens[j];
+			if (!dval(&rv)) {
+				dval(&rv) = 2.*dval(&rv0);
+				dval(&rv) *= tinytens[j];
+#endif
+				if (!dval(&rv)) {
+ undfl:
+					dval(&rv) = 0.;
+					goto range_err;
+					}
+#ifndef Avoid_Underflow
+				word0(&rv) = Tiny0;
+				word1(&rv) = Tiny1;
+				/* The refinement below will clean
+				 * this approximation up.
+				 */
+				}
+#endif
+			}
+		}
+
+	/* Now the hard part -- adjusting rv to the correct value.*/
+
+	/* Put digits into bd: true value = bd * 10^e */
+
+	bd0 = s2b(s0, nd0, nd, y, dplen);
+
+	for(;;) {
+		bd = Balloc(bd0->k);
+		Bcopy(bd, bd0);
+		bb = d2b(dval(&rv), &bbe, &bbbits);	/* rv = bb * 2^bbe */
+		bs = i2b(1);
+
+		if (e >= 0) {
+			bb2 = bb5 = 0;
+			bd2 = bd5 = e;
+			}
+		else {
+			bb2 = bb5 = -e;
+			bd2 = bd5 = 0;
+			}
+		if (bbe >= 0)
+			bb2 += bbe;
+		else
+			bd2 -= bbe;
+		bs2 = bb2;
+#ifdef Honor_FLT_ROUNDS
+		if (Rounding != 1)
+			bs2++;
+#endif
+#ifdef Avoid_Underflow
+		Lsb = LSB;
+		Lsb1 = 0;
+		j = bbe - scale;
+		i = j + bbbits - 1;	/* logb(rv) */
+		j = P + 1 - bbbits;
+		if (i < Emin) {	/* denormal */
+			i = Emin - i;
+			j -= i;
+			if (i < 32)
+				Lsb <<= i;
+			else
+				Lsb1 = Lsb << (i-32);
+			}
+#else /*Avoid_Underflow*/
+#ifdef Sudden_Underflow
+#ifdef IBM
+		j = 1 + 4*P - 3 - bbbits + ((bbe + bbbits - 1) & 3);
+#else
+		j = P + 1 - bbbits;
+#endif
+#else /*Sudden_Underflow*/
+		j = bbe;
+		i = j + bbbits - 1;	/* logb(&rv) */
+		if (i < Emin)	/* denormal */
+			j += P - Emin;
+		else
+			j = P + 1 - bbbits;
+#endif /*Sudden_Underflow*/
+#endif /*Avoid_Underflow*/
+		bb2 += j;
+		bd2 += j;
+#ifdef Avoid_Underflow
+		bd2 += scale;
+#endif
+		i = bb2 < bd2 ? bb2 : bd2;
+		if (i > bs2)
+			i = bs2;
+		if (i > 0) {
+			bb2 -= i;
+			bd2 -= i;
+			bs2 -= i;
+			}
+		if (bb5 > 0) {
+			bs = pow5mult(bs, bb5);
+			bb1 = mult(bs, bb);
+			Bfree(bb);
+			bb = bb1;
+			}
+		if (bb2 > 0)
+			bb = lshift(bb, bb2);
+		if (bd5 > 0)
+			bd = pow5mult(bd, bd5);
+		if (bd2 > 0)
+			bd = lshift(bd, bd2);
+		if (bs2 > 0)
+			bs = lshift(bs, bs2);
+		delta = diff(bb, bd);
+		dsign = delta->sign;
+		delta->sign = 0;
+		i = cmp(delta, bs);
+#ifdef Honor_FLT_ROUNDS
+		if (Rounding != 1) {
+			if (i < 0) {
+				/* Error is less than an ulp */
+				if (!delta->x[0] && delta->wds <= 1) {
+					/* exact */
+#ifdef SET_INEXACT
+					inexact = 0;
+#endif
+					break;
+					}
+				if (Rounding) {
+					if (dsign) {
+						dval(&adj) = 1.;
+						goto apply_adj;
+						}
+					}
+				else if (!dsign) {
+					dval(&adj) = -1.;
+					if (!word1(&rv)
+					 && !(word0(&rv) & Frac_mask)) {
+						y = word0(&rv) & Exp_mask;
+#ifdef Avoid_Underflow
+						if (!scale || y > 2*P*Exp_msk1)
+#else
+						if (y)
+#endif
+						  {
+						  delta = lshift(delta,Log2P);
+						  if (cmp(delta, bs) <= 0)
+							dval(&adj) = -0.5;
+						  }
+						}
+ apply_adj:
+#ifdef Avoid_Underflow
+					if (scale && (y = word0(&rv) & Exp_mask)
+						<= 2*P*Exp_msk1)
+					  word0(&adj) += (2*P+1)*Exp_msk1 - y;
+#else
+#ifdef Sudden_Underflow
+					if ((word0(&rv) & Exp_mask) <=
+							P*Exp_msk1) {
+						word0(&rv) += P*Exp_msk1;
+						dval(&rv) += adj*ulp(&rv);
+						word0(&rv) -= P*Exp_msk1;
+						}
+					else
+#endif /*Sudden_Underflow*/
+#endif /*Avoid_Underflow*/
+					dval(&rv) += adj.d*ulp(&rv);
+					}
+				break;
+				}
+			dval(&adj) = ratio(delta, bs);
+			if (adj.d < 1.)
+				dval(&adj) = 1.;
+			if (adj.d <= 0x7ffffffe) {
+				/* dval(&adj) = Rounding ? ceil(&adj) : floor(&adj); */
+				y = adj.d;
+				if (y != adj.d) {
+					if (!((Rounding>>1) ^ dsign))
+						y++;
+					dval(&adj) = y;
+					}
+				}
+#ifdef Avoid_Underflow
+			if (scale && (y = word0(&rv) & Exp_mask) <= 2*P*Exp_msk1)
+				word0(&adj) += (2*P+1)*Exp_msk1 - y;
+#else
+#ifdef Sudden_Underflow
+			if ((word0(&rv) & Exp_mask) <= P*Exp_msk1) {
+				word0(&rv) += P*Exp_msk1;
+				dval(&adj) *= ulp(&rv);
+				if (dsign)
+					dval(&rv) += adj;
+				else
+					dval(&rv) -= adj;
+				word0(&rv) -= P*Exp_msk1;
+				goto cont;
+				}
+#endif /*Sudden_Underflow*/
+#endif /*Avoid_Underflow*/
+			dval(&adj) *= ulp(&rv);
+			if (dsign) {
+				if (word0(&rv) == Big0 && word1(&rv) == Big1)
+					goto ovfl;
+				dval(&rv) += adj.d;
+				}
+			else
+				dval(&rv) -= adj.d;
+			goto cont;
+			}
+#endif /*Honor_FLT_ROUNDS*/
+
+		if (i < 0) {
+			/* Error is less than half an ulp -- check for
+			 * special case of mantissa a power of two.
+			 */
+			if (dsign || word1(&rv) || word0(&rv) & Bndry_mask
+#ifdef IEEE_Arith
+#ifdef Avoid_Underflow
+			 || (word0(&rv) & Exp_mask) <= (2*P+1)*Exp_msk1
+#else
+			 || (word0(&rv) & Exp_mask) <= Exp_msk1
+#endif
+#endif
+				) {
+#ifdef SET_INEXACT
+				if (!delta->x[0] && delta->wds <= 1)
+					inexact = 0;
+#endif
+				break;
+				}
+			if (!delta->x[0] && delta->wds <= 1) {
+				/* exact result */
+#ifdef SET_INEXACT
+				inexact = 0;
+#endif
+				break;
+				}
+			delta = lshift(delta,Log2P);
+			if (cmp(delta, bs) > 0)
+				goto drop_down;
+			break;
+			}
+		if (i == 0) {
+			/* exactly half-way between */
+			if (dsign) {
+				if ((word0(&rv) & Bndry_mask1) == Bndry_mask1
+				 &&  word1(&rv) == (
+#ifdef Avoid_Underflow
+			(scale && (y = word0(&rv) & Exp_mask) <= 2*P*Exp_msk1)
+		? (0xffffffff & (0xffffffff << (2*P+1-(y>>Exp_shift)))) :
+#endif
+						   0xffffffff)) {
+					/*boundary case -- increment exponent*/
+					if (word0(&rv) == Big0 && word1(&rv) == Big1)
+						goto ovfl;
+					word0(&rv) = (word0(&rv) & Exp_mask)
+						+ Exp_msk1
+#ifdef IBM
+						| Exp_msk1 >> 4
+#endif
+						;
+					word1(&rv) = 0;
+#ifdef Avoid_Underflow
+					dsign = 0;
+#endif
+					break;
+					}
+				}
+			else if (!(word0(&rv) & Bndry_mask) && !word1(&rv)) {
+ drop_down:
+				/* boundary case -- decrement exponent */
+#ifdef Sudden_Underflow /*{{*/
+				L = word0(&rv) & Exp_mask;
+#ifdef IBM
+				if (L <  Exp_msk1)
+#else
+#ifdef Avoid_Underflow
+				if (L <= (scale ? (2*P+1)*Exp_msk1 : Exp_msk1))
+#else
+				if (L <= Exp_msk1)
+#endif /*Avoid_Underflow*/
+#endif /*IBM*/
+					goto undfl;
+				L -= Exp_msk1;
+#else /*Sudden_Underflow}{*/
+#ifdef Avoid_Underflow
+				if (scale) {
+					L = word0(&rv) & Exp_mask;
+					if (L <= (2*P+1)*Exp_msk1) {
+						if (L > (P+2)*Exp_msk1)
+							/* round even ==> */
+							/* accept rv */
+							break;
+						/* rv = smallest denormal */
+						goto undfl;
+						}
+					}
+#endif /*Avoid_Underflow*/
+				L = (word0(&rv) & Exp_mask) - Exp_msk1;
+#endif /*Sudden_Underflow}}*/
+				word0(&rv) = L | Bndry_mask1;
+				word1(&rv) = 0xffffffff;
+#ifdef IBM
+				goto cont;
+#else
+				break;
+#endif
+				}
+#ifndef ROUND_BIASED
+#ifdef Avoid_Underflow
+			if (Lsb1) {
+				if (!(word0(&rv) & Lsb1))
+					break;
+				}
+			else if (!(word1(&rv) & Lsb))
+				break;
+#else
+			if (!(word1(&rv) & LSB))
+				break;
+#endif
+#endif
+			if (dsign)
+#ifdef Avoid_Underflow
+				dval(&rv) += sulp(&rv, scale);
+#else
+				dval(&rv) += ulp(&rv);
+#endif
+#ifndef ROUND_BIASED
+			else {
+#ifdef Avoid_Underflow
+				dval(&rv) -= sulp(&rv, scale);
+#else
+				dval(&rv) -= ulp(&rv);
+#endif
+#ifndef Sudden_Underflow
+				if (!dval(&rv))
+					goto undfl;
+#endif
+				}
+#ifdef Avoid_Underflow
+			dsign = 1 - dsign;
+#endif
+#endif
+			break;
+			}
+		if ((aadj = ratio(delta, bs)) <= 2.) {
+			if (dsign)
+				aadj = dval(&aadj1) = 1.;
+			else if (word1(&rv) || word0(&rv) & Bndry_mask) {
+#ifndef Sudden_Underflow
+				if (word1(&rv) == Tiny1 && !word0(&rv))
+					goto undfl;
+#endif
+				aadj = 1.;
+				dval(&aadj1) = -1.;
+				}
+			else {
+				/* special case -- power of FLT_RADIX to be */
+				/* rounded down... */
+
+				if (aadj < 2./FLT_RADIX)
+					aadj = 1./FLT_RADIX;
+				else
+					aadj *= 0.5;
+				dval(&aadj1) = -aadj;
+				}
+			}
+		else {
+			aadj *= 0.5;
+			dval(&aadj1) = dsign ? aadj : -aadj;
+#ifdef Check_FLT_ROUNDS
+			switch(Rounding) {
+				case 2: /* towards +infinity */
+					dval(&aadj1) -= 0.5;
+					break;
+				case 0: /* towards 0 */
+				case 3: /* towards -infinity */
+					dval(&aadj1) += 0.5;
+				}
+#else
+			if (Flt_Rounds == 0)
+				dval(&aadj1) += 0.5;
+#endif /*Check_FLT_ROUNDS*/
+			}
+		y = word0(&rv) & Exp_mask;
+
+		/* Check for overflow */
+
+		if (y == Exp_msk1*(DBL_MAX_EXP+Bias-1)) {
+			dval(&rv0) = dval(&rv);
+			word0(&rv) -= P*Exp_msk1;
+			dval(&adj) = dval(&aadj1) * ulp(&rv);
+			dval(&rv) += dval(&adj);
+			if ((word0(&rv) & Exp_mask) >=
+					Exp_msk1*(DBL_MAX_EXP+Bias-P)) {
+				if (word0(&rv0) == Big0 && word1(&rv0) == Big1)
+					goto ovfl;
+				word0(&rv) = Big0;
+				word1(&rv) = Big1;
+				goto cont;
+				}
+			else
+				word0(&rv) += P*Exp_msk1;
+			}
+		else {
+#ifdef Avoid_Underflow
+			if (scale && y <= 2*P*Exp_msk1) {
+				if (aadj <= 0x7fffffff) {
+					if ((z = (ULong)aadj) <= 0)
+						z = 1;
+					aadj = z;
+					dval(&aadj1) = dsign ? aadj : -aadj;
+					}
+				word0(&aadj1) += (2*P+1)*Exp_msk1 - y;
+				}
+			dval(&adj) = dval(&aadj1) * ulp(&rv);
+			dval(&rv) += dval(&adj);
+#else
+#ifdef Sudden_Underflow
+			if ((word0(&rv) & Exp_mask) <= P*Exp_msk1) {
+				dval(&rv0) = dval(&rv);
+				word0(&rv) += P*Exp_msk1;
+				dval(&adj) = dval(&aadj1) * ulp(&rv);
+				dval(&rv) += adj;
+#ifdef IBM
+				if ((word0(&rv) & Exp_mask) <  P*Exp_msk1)
+#else
+				if ((word0(&rv) & Exp_mask) <= P*Exp_msk1)
+#endif
+					{
+					if (word0(&rv0) == Tiny0
+					 && word1(&rv0) == Tiny1)
+						goto undfl;
+					word0(&rv) = Tiny0;
+					word1(&rv) = Tiny1;
+					goto cont;
+					}
+				else
+					word0(&rv) -= P*Exp_msk1;
+				}
+			else {
+				dval(&adj) = dval(&aadj1) * ulp(&rv);
+				dval(&rv) += adj;
+				}
+#else /*Sudden_Underflow*/
+			/* Compute dval(&adj) so that the IEEE rounding rules will
+			 * correctly round rv + dval(&adj) in some half-way cases.
+			 * If rv * ulp(&rv) is denormalized (i.e.,
+			 * y <= (P-1)*Exp_msk1), we must adjust aadj to avoid
+			 * trouble from bits lost to denormalization;
+			 * example: 1.2e-307 .
+			 */
+			if (y <= (P-1)*Exp_msk1 && aadj > 1.) {
+				dval(&aadj1) = (double)(int)(aadj + 0.5);
+				if (!dsign)
+					dval(&aadj1) = -dval(&aadj1);
+				}
+			dval(&adj) = dval(&aadj1) * ulp(&rv);
+			dval(&rv) += adj;
+#endif /*Sudden_Underflow*/
+#endif /*Avoid_Underflow*/
+			}
+		z = word0(&rv) & Exp_mask;
+#ifndef SET_INEXACT
+#ifdef Avoid_Underflow
+		if (!scale)
+#endif
+		if (y == z) {
+			/* Can we stop now? */
+			L = (Long)aadj;
+			aadj -= L;
+			/* The tolerances below are conservative. */
+			if (dsign || word1(&rv) || word0(&rv) & Bndry_mask) {
+				if (aadj < .4999999 || aadj > .5000001)
+					break;
+				}
+			else if (aadj < .4999999/FLT_RADIX)
+				break;
+			}
+#endif
+ cont:
+		Bfree(bb);
+		Bfree(bd);
+		Bfree(bs);
+		Bfree(delta);
+		}
+	Bfree(bb);
+	Bfree(bd);
+	Bfree(bs);
+	Bfree(bd0);
+	Bfree(delta);
+#ifdef SET_INEXACT
+	if (inexact) {
+		if (!oldinexact) {
+			word0(&rv0) = Exp_1 + (70 << Exp_shift);
+			word1(&rv0) = 0;
+			dval(&rv0) += 1.;
+			}
+		}
+	else if (!oldinexact)
+		clear_inexact();
+#endif
+#ifdef Avoid_Underflow
+	if (scale) {
+		word0(&rv0) = Exp_1 - 2*P*Exp_msk1;
+		word1(&rv0) = 0;
+		dval(&rv) *= dval(&rv0);
+#ifndef NO_ERRNO
+		/* try to avoid the bug of testing an 8087 register value */
+#ifdef IEEE_Arith
+		if (!(word0(&rv) & Exp_mask))
+#else
+		if (word0(&rv) == 0 && word1(&rv) == 0)
+#endif
+			errno = ERANGE;
+#endif
+		}
+#endif /* Avoid_Underflow */
+#ifdef SET_INEXACT
+	if (inexact && !(word0(&rv) & Exp_mask)) {
+		/* set underflow bit */
+		dval(&rv0) = 1e-300;
+		dval(&rv0) *= dval(&rv0);
+		}
+#endif
+ ret:
+	if (se)
+		*se = (char *)s;
+	return sign ? -dval(&rv) : dval(&rv);
+	}
+
diff --git a/libraries/gdtoa/strtodI.c b/libraries/gdtoa/strtodI.c
new file mode 100644
index 000000000..0b7b8a45c
--- /dev/null
+++ b/libraries/gdtoa/strtodI.c
@@ -0,0 +1,163 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998, 2000 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+ static double
+#ifdef KR_headers
+ulpdown(d) U *d;
+#else
+ulpdown(U *d)
+#endif
+{
+	double u;
+	ULong *L = d->L;
+
+	u = ulp(d);
+	if (!(L[_1] | (L[_0] & 0xfffff))
+	 && (L[_0] & 0x7ff00000) > 0x00100000)
+		u *= 0.5;
+	return u;
+	}
+
+ int
+#ifdef KR_headers
+strtodI(s, sp, dd) CONST char *s; char **sp; double *dd;
+#else
+strtodI(CONST char *s, char **sp, double *dd)
+#endif
+{
+	static FPI fpi = { 53, 1-1023-53+1, 2046-1023-53+1, 1, SI };
+	ULong bits[2], sign;
+	Long exp;
+	int j, k;
+	U *u;
+
+	k = strtodg(s, sp, &fpi, &exp, bits);
+	u = (U*)dd;
+	sign = k & STRTOG_Neg ? 0x80000000L : 0;
+	switch(k & STRTOG_Retmask) {
+	  case STRTOG_NoNumber:
+		dval(&u[0]) = dval(&u[1]) = 0.;
+		break;
+
+	  case STRTOG_Zero:
+		dval(&u[0]) = dval(&u[1]) = 0.;
+#ifdef Sudden_Underflow
+		if (k & STRTOG_Inexact) {
+			if (sign)
+				word0(&u[0]) = 0x80100000L;
+			else
+				word0(&u[1]) = 0x100000L;
+			}
+		break;
+#else
+		goto contain;
+#endif
+
+	  case STRTOG_Denormal:
+		word1(&u[0]) = bits[0];
+		word0(&u[0]) = bits[1];
+		goto contain;
+
+	  case STRTOG_Normal:
+		word1(&u[0]) = bits[0];
+		word0(&u[0]) = (bits[1] & ~0x100000) | ((exp + 0x3ff + 52) << 20);
+	  contain:
+		j = k & STRTOG_Inexact;
+		if (sign) {
+			word0(&u[0]) |= sign;
+			j = STRTOG_Inexact - j;
+			}
+		switch(j) {
+		  case STRTOG_Inexlo:
+#ifdef Sudden_Underflow
+			if ((u->L[_0] & 0x7ff00000) < 0x3500000) {
+				word0(&u[1]) = word0(&u[0]) + 0x3500000;
+				word1(&u[1]) = word1(&u[0]);
+				dval(&u[1]) += ulp(&u[1]);
+				word0(&u[1]) -= 0x3500000;
+				if (!(word0(&u[1]) & 0x7ff00000)) {
+					word0(&u[1]) = sign;
+					word1(&u[1]) = 0;
+					}
+				}
+			else
+#endif
+			dval(&u[1]) = dval(&u[0]) + ulp(&u[0]);
+			break;
+		  case STRTOG_Inexhi:
+			dval(&u[1]) = dval(&u[0]);
+#ifdef Sudden_Underflow
+			if ((word0(&u[0]) & 0x7ff00000) < 0x3500000) {
+				word0(&u[0]) += 0x3500000;
+				dval(&u[0]) -= ulpdown(u);
+				word0(&u[0]) -= 0x3500000;
+				if (!(word0(&u[0]) & 0x7ff00000)) {
+					word0(&u[0]) = sign;
+					word1(&u[0]) = 0;
+					}
+				}
+			else
+#endif
+			dval(&u[0]) -= ulpdown(u);
+			break;
+		  default:
+			dval(&u[1]) = dval(&u[0]);
+		  }
+		break;
+
+	  case STRTOG_Infinite:
+		word0(&u[0]) = word0(&u[1]) = sign | 0x7ff00000;
+		word1(&u[0]) = word1(&u[1]) = 0;
+		if (k & STRTOG_Inexact) {
+			if (sign) {
+				word0(&u[1]) = 0xffefffffL;
+				word1(&u[1]) = 0xffffffffL;
+				}
+			else {
+				word0(&u[0]) = 0x7fefffffL;
+				word1(&u[0]) = 0xffffffffL;
+				}
+			}
+		break;
+
+	  case STRTOG_NaN:
+		u->L[0] = (u+1)->L[0] = d_QNAN0;
+		u->L[1] = (u+1)->L[1] = d_QNAN1;
+		break;
+
+	  case STRTOG_NaNbits:
+		word0(&u[0]) = word0(&u[1]) = 0x7ff00000 | sign | bits[1];
+		word1(&u[0]) = word1(&u[1]) = bits[0];
+	  }
+	return k;
+	}
diff --git a/libraries/gdtoa/strtodg.c b/libraries/gdtoa/strtodg.c
new file mode 100644
index 000000000..c2e3365c7
--- /dev/null
+++ b/libraries/gdtoa/strtodg.c
@@ -0,0 +1,1065 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998-2001 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+#ifdef USE_LOCALE
+#include "locale.h"
+#endif
+
+ static CONST int
+fivesbits[] = {	 0,  3,  5,  7, 10, 12, 14, 17, 19, 21,
+		24, 26, 28, 31, 33, 35, 38, 40, 42, 45,
+		47, 49, 52
+#ifdef VAX
+		, 54, 56
+#endif
+		};
+
+ Bigint *
+#ifdef KR_headers
+increment(b) Bigint *b;
+#else
+increment(Bigint *b)
+#endif
+{
+	ULong *x, *xe;
+	Bigint *b1;
+#ifdef Pack_16
+	ULong carry = 1, y;
+#endif
+
+	x = b->x;
+	xe = x + b->wds;
+#ifdef Pack_32
+	do {
+		if (*x < (ULong)0xffffffffL) {
+			++*x;
+			return b;
+			}
+		*x++ = 0;
+		} while(x < xe);
+#else
+	do {
+		y = *x + carry;
+		carry = y >> 16;
+		*x++ = y & 0xffff;
+		if (!carry)
+			return b;
+		} while(x < xe);
+	if (carry)
+#endif
+	{
+		if (b->wds >= b->maxwds) {
+			b1 = Balloc(b->k+1);
+			Bcopy(b1,b);
+			Bfree(b);
+			b = b1;
+			}
+		b->x[b->wds++] = 1;
+		}
+	return b;
+	}
+
+ void
+#ifdef KR_headers
+decrement(b) Bigint *b;
+#else
+decrement(Bigint *b)
+#endif
+{
+	ULong *x, *xe;
+#ifdef Pack_16
+	ULong borrow = 1, y;
+#endif
+
+	x = b->x;
+	xe = x + b->wds;
+#ifdef Pack_32
+	do {
+		if (*x) {
+			--*x;
+			break;
+			}
+		*x++ = 0xffffffffL;
+		}
+		while(x < xe);
+#else
+	do {
+		y = *x - borrow;
+		borrow = (y & 0x10000) >> 16;
+		*x++ = y & 0xffff;
+		} while(borrow && x < xe);
+#endif
+	}
+
+ static int
+#ifdef KR_headers
+all_on(b, n) Bigint *b; int n;
+#else
+all_on(Bigint *b, int n)
+#endif
+{
+	ULong *x, *xe;
+
+	x = b->x;
+	xe = x + (n >> kshift);
+	while(x < xe)
+		if ((*x++ & ALL_ON) != ALL_ON)
+			return 0;
+	if (n &= kmask)
+		return ((*x | (ALL_ON << n)) & ALL_ON) == ALL_ON;
+	return 1;
+	}
+
+ Bigint *
+#ifdef KR_headers
+set_ones(b, n) Bigint *b; int n;
+#else
+set_ones(Bigint *b, int n)
+#endif
+{
+	int k;
+	ULong *x, *xe;
+
+	k = (n + ((1 << kshift) - 1)) >> kshift;
+	if (b->k < k) {
+		Bfree(b);
+		b = Balloc(k);
+		}
+	k = n >> kshift;
+	if (n &= kmask)
+		k++;
+	b->wds = k;
+	x = b->x;
+	xe = x + k;
+	while(x < xe)
+		*x++ = ALL_ON;
+	if (n)
+		x[-1] >>= ULbits - n;
+	return b;
+	}
+
+ static int
+rvOK
+#ifdef KR_headers
+ (d, fpi, exp, bits, exact, rd, irv)
+ U *d; FPI *fpi; Long *exp; ULong *bits; int exact, rd, *irv;
+#else
+ (U *d, FPI *fpi, Long *exp, ULong *bits, int exact, int rd, int *irv)
+#endif
+{
+	Bigint *b;
+	ULong carry, inex, lostbits;
+	int bdif, e, j, k, k1, nb, rv;
+
+	carry = rv = 0;
+	b = d2b(dval(d), &e, &bdif);
+	bdif -= nb = fpi->nbits;
+	e += bdif;
+	if (bdif <= 0) {
+		if (exact)
+			goto trunc;
+		goto ret;
+		}
+	if (P == nb) {
+		if (
+#ifndef IMPRECISE_INEXACT
+			exact &&
+#endif
+			fpi->rounding ==
+#ifdef RND_PRODQUOT
+					FPI_Round_near
+#else
+					Flt_Rounds
+#endif
+			) goto trunc;
+		goto ret;
+		}
+	switch(rd) {
+	  case 1: /* round down (toward -Infinity) */
+		goto trunc;
+	  case 2: /* round up (toward +Infinity) */
+		break;
+	  default: /* round near */
+		k = bdif - 1;
+		if (k < 0)
+			goto trunc;
+		if (!k) {
+			if (!exact)
+				goto ret;
+			if (b->x[0] & 2)
+				break;
+			goto trunc;
+			}
+		if (b->x[k>>kshift] & ((ULong)1 << (k & kmask)))
+			break;
+		goto trunc;
+	  }
+	/* "break" cases: round up 1 bit, then truncate; bdif > 0 */
+	carry = 1;
+ trunc:
+	inex = lostbits = 0;
+	if (bdif > 0) {
+		if ( (lostbits = any_on(b, bdif)) !=0)
+			inex = STRTOG_Inexlo;
+		rshift(b, bdif);
+		if (carry) {
+			inex = STRTOG_Inexhi;
+			b = increment(b);
+			if ( (j = nb & kmask) !=0)
+				j = ULbits - j;
+			if (hi0bits(b->x[b->wds - 1]) != j) {
+				if (!lostbits)
+					lostbits = b->x[0] & 1;
+				rshift(b, 1);
+				e++;
+				}
+			}
+		}
+	else if (bdif < 0)
+		b = lshift(b, -bdif);
+	if (e < fpi->emin) {
+		k = fpi->emin - e;
+		e = fpi->emin;
+		if (k > nb || fpi->sudden_underflow) {
+			b->wds = inex = 0;
+			*irv = STRTOG_Underflow | STRTOG_Inexlo;
+			}
+		else {
+			k1 = k - 1;
+			if (k1 > 0 && !lostbits)
+				lostbits = any_on(b, k1);
+			if (!lostbits && !exact)
+				goto ret;
+			lostbits |=
+			  carry = b->x[k1>>kshift] & (1 << (k1 & kmask));
+			rshift(b, k);
+			*irv = STRTOG_Denormal;
+			if (carry) {
+				b = increment(b);
+				inex = STRTOG_Inexhi | STRTOG_Underflow;
+				}
+			else if (lostbits)
+				inex = STRTOG_Inexlo | STRTOG_Underflow;
+			}
+		}
+	else if (e > fpi->emax) {
+		e = fpi->emax + 1;
+		*irv = STRTOG_Infinite | STRTOG_Overflow | STRTOG_Inexhi;
+#ifndef NO_ERRNO
+		errno = ERANGE;
+#endif
+		b->wds = inex = 0;
+		}
+	*exp = e;
+	copybits(bits, nb, b);
+	*irv |= inex;
+	rv = 1;
+ ret:
+	Bfree(b);
+	return rv;
+	}
+
+ static int
+#ifdef KR_headers
+mantbits(d) U *d;
+#else
+mantbits(U *d)
+#endif
+{
+	ULong L;
+#ifdef VAX
+	L = word1(d) << 16 | word1(d) >> 16;
+	if (L)
+#else
+	if ( (L = word1(d)) !=0)
+#endif
+		return P - lo0bits(&L);
+#ifdef VAX
+	L = word0(d) << 16 | word0(d) >> 16 | Exp_msk11;
+#else
+	L = word0(d) | Exp_msk1;
+#endif
+	return P - 32 - lo0bits(&L);
+	}
+
+ int
+strtodg
+#ifdef KR_headers
+	(s00, se, fpi, exp, bits)
+	CONST char *s00; char **se; FPI *fpi; Long *exp; ULong *bits;
+#else
+	(CONST char *s00, char **se, FPI *fpi, Long *exp, ULong *bits)
+#endif
+{
+	int abe, abits, asub;
+	int bb0, bb2, bb5, bbe, bd2, bd5, bbbits, bs2, c, decpt, denorm;
+	int dsign, e, e1, e2, emin, esign, finished, i, inex, irv;
+	int j, k, nbits, nd, nd0, nf, nz, nz0, rd, rvbits, rve, rve1, sign;
+	int sudden_underflow;
+	CONST char *s, *s0, *s1;
+	double adj0, tol;
+	Long L;
+	U adj, rv;
+	ULong *b, *be, y, z;
+	Bigint *ab, *bb, *bb1, *bd, *bd0, *bs, *delta, *rvb, *rvb0;
+#ifdef USE_LOCALE /*{{*/
+#ifdef NO_LOCALE_CACHE
+	char *decimalpoint = localeconv()->decimal_point;
+	int dplen = strlen(decimalpoint);
+#else
+	char *decimalpoint;
+	static char *decimalpoint_cache;
+	static int dplen;
+	if (!(s0 = decimalpoint_cache)) {
+		s0 = localeconv()->decimal_point;
+		if ((decimalpoint_cache = (char*)MALLOC(strlen(s0) + 1))) {
+			strcpy(decimalpoint_cache, s0);
+			s0 = decimalpoint_cache;
+			}
+		dplen = strlen(s0);
+		}
+	decimalpoint = (char*)s0;
+#endif /*NO_LOCALE_CACHE*/
+#else  /*USE_LOCALE}{*/
+#define dplen 1
+#endif /*USE_LOCALE}}*/
+
+	irv = STRTOG_Zero;
+	denorm = sign = nz0 = nz = 0;
+	dval(&rv) = 0.;
+	rvb = 0;
+	nbits = fpi->nbits;
+	for(s = s00;;s++) switch(*s) {
+		case '-':
+			sign = 1;
+			/* no break */
+		case '+':
+			if (*++s)
+				goto break2;
+			/* no break */
+		case 0:
+			sign = 0;
+			irv = STRTOG_NoNumber;
+			s = s00;
+			goto ret;
+		case '\t':
+		case '\n':
+		case '\v':
+		case '\f':
+		case '\r':
+		case ' ':
+			continue;
+		default:
+			goto break2;
+		}
+ break2:
+	if (*s == '0') {
+#ifndef NO_HEX_FP
+		switch(s[1]) {
+		  case 'x':
+		  case 'X':
+			irv = gethex(&s, fpi, exp, &rvb, sign);
+			if (irv == STRTOG_NoNumber) {
+				s = s00;
+				sign = 0;
+				}
+			goto ret;
+		  }
+#endif
+		nz0 = 1;
+		while(*++s == '0') ;
+		if (!*s)
+			goto ret;
+		}
+	sudden_underflow = fpi->sudden_underflow;
+	s0 = s;
+	y = z = 0;
+	for(decpt = nd = nf = 0; (c = *s) >= '0' && c <= '9'; nd++, s++)
+		if (nd < 9)
+			y = 10*y + c - '0';
+		else if (nd < 16)
+			z = 10*z + c - '0';
+	nd0 = nd;
+#ifdef USE_LOCALE
+	if (c == *decimalpoint) {
+		for(i = 1; decimalpoint[i]; ++i)
+			if (s[i] != decimalpoint[i])
+				goto dig_done;
+		s += i;
+		c = *s;
+#else
+	if (c == '.') {
+		c = *++s;
+#endif
+		decpt = 1;
+		if (!nd) {
+			for(; c == '0'; c = *++s)
+				nz++;
+			if (c > '0' && c <= '9') {
+				s0 = s;
+				nf += nz;
+				nz = 0;
+				goto have_dig;
+				}
+			goto dig_done;
+			}
+		for(; c >= '0' && c <= '9'; c = *++s) {
+ have_dig:
+			nz++;
+			if (c -= '0') {
+				nf += nz;
+				for(i = 1; i < nz; i++)
+					if (nd++ < 9)
+						y *= 10;
+					else if (nd <= DBL_DIG + 1)
+						z *= 10;
+				if (nd++ < 9)
+					y = 10*y + c;
+				else if (nd <= DBL_DIG + 1)
+					z = 10*z + c;
+				nz = 0;
+				}
+			}
+		}/*}*/
+ dig_done:
+	e = 0;
+	if (c == 'e' || c == 'E') {
+		if (!nd && !nz && !nz0) {
+			irv = STRTOG_NoNumber;
+			s = s00;
+			goto ret;
+			}
+		s00 = s;
+		esign = 0;
+		switch(c = *++s) {
+			case '-':
+				esign = 1;
+			case '+':
+				c = *++s;
+			}
+		if (c >= '0' && c <= '9') {
+			while(c == '0')
+				c = *++s;
+			if (c > '0' && c <= '9') {
+				L = c - '0';
+				s1 = s;
+				while((c = *++s) >= '0' && c <= '9')
+					L = 10*L + c - '0';
+				if (s - s1 > 8 || L > 19999)
+					/* Avoid confusion from exponents
+					 * so large that e might overflow.
+					 */
+					e = 19999; /* safe for 16 bit ints */
+				else
+					e = (int)L;
+				if (esign)
+					e = -e;
+				}
+			else
+				e = 0;
+			}
+		else
+			s = s00;
+		}
+	if (!nd) {
+		if (!nz && !nz0) {
+#ifdef INFNAN_CHECK
+			/* Check for Nan and Infinity */
+			if (!decpt)
+			 switch(c) {
+			  case 'i':
+			  case 'I':
+				if (match(&s,"nf")) {
+					--s;
+					if (!match(&s,"inity"))
+						++s;
+					irv = STRTOG_Infinite;
+					goto infnanexp;
+					}
+				break;
+			  case 'n':
+			  case 'N':
+				if (match(&s, "an")) {
+					irv = STRTOG_NaN;
+					*exp = fpi->emax + 1;
+#ifndef No_Hex_NaN
+					if (*s == '(') /*)*/
+						irv = hexnan(&s, fpi, bits);
+#endif
+					goto infnanexp;
+					}
+			  }
+#endif /* INFNAN_CHECK */
+			irv = STRTOG_NoNumber;
+			s = s00;
+			}
+		goto ret;
+		}
+
+	irv = STRTOG_Normal;
+	e1 = e -= nf;
+	rd = 0;
+	switch(fpi->rounding & 3) {
+	  case FPI_Round_up:
+		rd = 2 - sign;
+		break;
+	  case FPI_Round_zero:
+		rd = 1;
+		break;
+	  case FPI_Round_down:
+		rd = 1 + sign;
+	  }
+
+	/* Now we have nd0 digits, starting at s0, followed by a
+	 * decimal point, followed by nd-nd0 digits.  The number we're
+	 * after is the integer represented by those digits times
+	 * 10**e */
+
+	if (!nd0)
+		nd0 = nd;
+	k = nd < DBL_DIG + 1 ? nd : DBL_DIG + 1;
+	dval(&rv) = y;
+	if (k > 9)
+		dval(&rv) = tens[k - 9] * dval(&rv) + z;
+	bd0 = 0;
+	if (nbits <= P && nd <= DBL_DIG) {
+		if (!e) {
+			if (rvOK(&rv, fpi, exp, bits, 1, rd, &irv))
+				goto ret;
+			}
+		else if (e > 0) {
+			if (e <= Ten_pmax) {
+#ifdef VAX
+				goto vax_ovfl_check;
+#else
+				i = fivesbits[e] + mantbits(&rv) <= P;
+				/* rv = */ rounded_product(dval(&rv), tens[e]);
+				if (rvOK(&rv, fpi, exp, bits, i, rd, &irv))
+					goto ret;
+				e1 -= e;
+				goto rv_notOK;
+#endif
+				}
+			i = DBL_DIG - nd;
+			if (e <= Ten_pmax + i) {
+				/* A fancier test would sometimes let us do
+				 * this for larger i values.
+				 */
+				e2 = e - i;
+				e1 -= i;
+				dval(&rv) *= tens[i];
+#ifdef VAX
+				/* VAX exponent range is so narrow we must
+				 * worry about overflow here...
+				 */
+ vax_ovfl_check:
+				dval(&adj) = dval(&rv);
+				word0(&adj) -= P*Exp_msk1;
+				/* adj = */ rounded_product(dval(&adj), tens[e2]);
+				if ((word0(&adj) & Exp_mask)
+				 > Exp_msk1*(DBL_MAX_EXP+Bias-1-P))
+					goto rv_notOK;
+				word0(&adj) += P*Exp_msk1;
+				dval(&rv) = dval(&adj);
+#else
+				/* rv = */ rounded_product(dval(&rv), tens[e2]);
+#endif
+				if (rvOK(&rv, fpi, exp, bits, 0, rd, &irv))
+					goto ret;
+				e1 -= e2;
+				}
+			}
+#ifndef Inaccurate_Divide
+		else if (e >= -Ten_pmax) {
+			/* rv = */ rounded_quotient(dval(&rv), tens[-e]);
+			if (rvOK(&rv, fpi, exp, bits, 0, rd, &irv))
+				goto ret;
+			e1 -= e;
+			}
+#endif
+		}
+ rv_notOK:
+	e1 += nd - k;
+
+	/* Get starting approximation = rv * 10**e1 */
+
+	e2 = 0;
+	if (e1 > 0) {
+		if ( (i = e1 & 15) !=0)
+			dval(&rv) *= tens[i];
+		if (e1 &= ~15) {
+			e1 >>= 4;
+			while(e1 >= (1 << (n_bigtens-1))) {
+				e2 += ((word0(&rv) & Exp_mask)
+					>> Exp_shift1) - Bias;
+				word0(&rv) &= ~Exp_mask;
+				word0(&rv) |= Bias << Exp_shift1;
+				dval(&rv) *= bigtens[n_bigtens-1];
+				e1 -= 1 << (n_bigtens-1);
+				}
+			e2 += ((word0(&rv) & Exp_mask) >> Exp_shift1) - Bias;
+			word0(&rv) &= ~Exp_mask;
+			word0(&rv) |= Bias << Exp_shift1;
+			for(j = 0; e1 > 0; j++, e1 >>= 1)
+				if (e1 & 1)
+					dval(&rv) *= bigtens[j];
+			}
+		}
+	else if (e1 < 0) {
+		e1 = -e1;
+		if ( (i = e1 & 15) !=0)
+			dval(&rv) /= tens[i];
+		if (e1 &= ~15) {
+			e1 >>= 4;
+			while(e1 >= (1 << (n_bigtens-1))) {
+				e2 += ((word0(&rv) & Exp_mask)
+					>> Exp_shift1) - Bias;
+				word0(&rv) &= ~Exp_mask;
+				word0(&rv) |= Bias << Exp_shift1;
+				dval(&rv) *= tinytens[n_bigtens-1];
+				e1 -= 1 << (n_bigtens-1);
+				}
+			e2 += ((word0(&rv) & Exp_mask) >> Exp_shift1) - Bias;
+			word0(&rv) &= ~Exp_mask;
+			word0(&rv) |= Bias << Exp_shift1;
+			for(j = 0; e1 > 0; j++, e1 >>= 1)
+				if (e1 & 1)
+					dval(&rv) *= tinytens[j];
+			}
+		}
+#ifdef IBM
+	/* e2 is a correction to the (base 2) exponent of the return
+	 * value, reflecting adjustments above to avoid overflow in the
+	 * native arithmetic.  For native IBM (base 16) arithmetic, we
+	 * must multiply e2 by 4 to change from base 16 to 2.
+	 */
+	e2 <<= 2;
+#endif
+	rvb = d2b(dval(&rv), &rve, &rvbits);	/* rv = rvb * 2^rve */
+	rve += e2;
+	if ((j = rvbits - nbits) > 0) {
+		rshift(rvb, j);
+		rvbits = nbits;
+		rve += j;
+		}
+	bb0 = 0;	/* trailing zero bits in rvb */
+	e2 = rve + rvbits - nbits;
+	if (e2 > fpi->emax + 1)
+		goto huge;
+	rve1 = rve + rvbits - nbits;
+	if (e2 < (emin = fpi->emin)) {
+		denorm = 1;
+		j = rve - emin;
+		if (j > 0) {
+			rvb = lshift(rvb, j);
+			rvbits += j;
+			}
+		else if (j < 0) {
+			rvbits += j;
+			if (rvbits <= 0) {
+				if (rvbits < -1) {
+ ufl:
+					rvb->wds = 0;
+					rvb->x[0] = 0;
+					*exp = emin;
+					irv = STRTOG_Underflow | STRTOG_Inexlo;
+					goto ret;
+					}
+				rvb->x[0] = rvb->wds = rvbits = 1;
+				}
+			else
+				rshift(rvb, -j);
+			}
+		rve = rve1 = emin;
+		if (sudden_underflow && e2 + 1 < emin)
+			goto ufl;
+		}
+
+	/* Now the hard part -- adjusting rv to the correct value.*/
+
+	/* Put digits into bd: true value = bd * 10^e */
+
+	bd0 = s2b(s0, nd0, nd, y, dplen);
+
+	for(;;) {
+		bd = Balloc(bd0->k);
+		Bcopy(bd, bd0);
+		bb = Balloc(rvb->k);
+		Bcopy(bb, rvb);
+		bbbits = rvbits - bb0;
+		bbe = rve + bb0;
+		bs = i2b(1);
+
+		if (e >= 0) {
+			bb2 = bb5 = 0;
+			bd2 = bd5 = e;
+			}
+		else {
+			bb2 = bb5 = -e;
+			bd2 = bd5 = 0;
+			}
+		if (bbe >= 0)
+			bb2 += bbe;
+		else
+			bd2 -= bbe;
+		bs2 = bb2;
+		j = nbits + 1 - bbbits;
+		i = bbe + bbbits - nbits;
+		if (i < emin)	/* denormal */
+			j += i - emin;
+		bb2 += j;
+		bd2 += j;
+		i = bb2 < bd2 ? bb2 : bd2;
+		if (i > bs2)
+			i = bs2;
+		if (i > 0) {
+			bb2 -= i;
+			bd2 -= i;
+			bs2 -= i;
+			}
+		if (bb5 > 0) {
+			bs = pow5mult(bs, bb5);
+			bb1 = mult(bs, bb);
+			Bfree(bb);
+			bb = bb1;
+			}
+		bb2 -= bb0;
+		if (bb2 > 0)
+			bb = lshift(bb, bb2);
+		else if (bb2 < 0)
+			rshift(bb, -bb2);
+		if (bd5 > 0)
+			bd = pow5mult(bd, bd5);
+		if (bd2 > 0)
+			bd = lshift(bd, bd2);
+		if (bs2 > 0)
+			bs = lshift(bs, bs2);
+		asub = 1;
+		inex = STRTOG_Inexhi;
+		delta = diff(bb, bd);
+		if (delta->wds <= 1 && !delta->x[0])
+			break;
+		dsign = delta->sign;
+		delta->sign = finished = 0;
+		L = 0;
+		i = cmp(delta, bs);
+		if (rd && i <= 0) {
+			irv = STRTOG_Normal;
+			if ( (finished = dsign ^ (rd&1)) !=0) {
+				if (dsign != 0) {
+					irv |= STRTOG_Inexhi;
+					goto adj1;
+					}
+				irv |= STRTOG_Inexlo;
+				if (rve1 == emin)
+					goto adj1;
+				for(i = 0, j = nbits; j >= ULbits;
+						i++, j -= ULbits) {
+					if (rvb->x[i] & ALL_ON)
+						goto adj1;
+					}
+				if (j > 1 && lo0bits(rvb->x + i) < j - 1)
+					goto adj1;
+				rve = rve1 - 1;
+				rvb = set_ones(rvb, rvbits = nbits);
+				break;
+				}
+			irv |= dsign ? STRTOG_Inexlo : STRTOG_Inexhi;
+			break;
+			}
+		if (i < 0) {
+			/* Error is less than half an ulp -- check for
+			 * special case of mantissa a power of two.
+			 */
+			irv = dsign
+				? STRTOG_Normal | STRTOG_Inexlo
+				: STRTOG_Normal | STRTOG_Inexhi;
+			if (dsign || bbbits > 1 || denorm || rve1 == emin)
+				break;
+			delta = lshift(delta,1);
+			if (cmp(delta, bs) > 0) {
+				irv = STRTOG_Normal | STRTOG_Inexlo;
+				goto drop_down;
+				}
+			break;
+			}
+		if (i == 0) {
+			/* exactly half-way between */
+			if (dsign) {
+				if (denorm && all_on(rvb, rvbits)) {
+					/*boundary case -- increment exponent*/
+					rvb->wds = 1;
+					rvb->x[0] = 1;
+					rve = emin + nbits - (rvbits = 1);
+					irv = STRTOG_Normal | STRTOG_Inexhi;
+					denorm = 0;
+					break;
+					}
+				irv = STRTOG_Normal | STRTOG_Inexlo;
+				}
+			else if (bbbits == 1) {
+				irv = STRTOG_Normal;
+ drop_down:
+				/* boundary case -- decrement exponent */
+				if (rve1 == emin) {
+					irv = STRTOG_Normal | STRTOG_Inexhi;
+					if (rvb->wds == 1 && rvb->x[0] == 1)
+						sudden_underflow = 1;
+					break;
+					}
+				rve -= nbits;
+				rvb = set_ones(rvb, rvbits = nbits);
+				break;
+				}
+			else
+				irv = STRTOG_Normal | STRTOG_Inexhi;
+			if ((bbbits < nbits && !denorm) || !(rvb->x[0] & 1))
+				break;
+			if (dsign) {
+				rvb = increment(rvb);
+				j = kmask & (ULbits - (rvbits & kmask));
+				if (hi0bits(rvb->x[rvb->wds - 1]) != j)
+					rvbits++;
+				irv = STRTOG_Normal | STRTOG_Inexhi;
+				}
+			else {
+				if (bbbits == 1)
+					goto undfl;
+				decrement(rvb);
+				irv = STRTOG_Normal | STRTOG_Inexlo;
+				}
+			break;
+			}
+		if ((dval(&adj) = ratio(delta, bs)) <= 2.) {
+ adj1:
+			inex = STRTOG_Inexlo;
+			if (dsign) {
+				asub = 0;
+				inex = STRTOG_Inexhi;
+				}
+			else if (denorm && bbbits <= 1) {
+ undfl:
+				rvb->wds = 0;
+				rve = emin;
+				irv = STRTOG_Underflow | STRTOG_Inexlo;
+				break;
+				}
+			adj0 = dval(&adj) = 1.;
+			}
+		else {
+			adj0 = dval(&adj) *= 0.5;
+			if (dsign) {
+				asub = 0;
+				inex = STRTOG_Inexlo;
+				}
+			if (dval(&adj) < 2147483647.) {
+				L = (Long)adj0;
+				adj0 -= L;
+				switch(rd) {
+				  case 0:
+					if (adj0 >= .5)
+						goto inc_L;
+					break;
+				  case 1:
+					if (asub && adj0 > 0.)
+						goto inc_L;
+					break;
+				  case 2:
+					if (!asub && adj0 > 0.) {
+ inc_L:
+						L++;
+						inex = STRTOG_Inexact - inex;
+						}
+				  }
+				dval(&adj) = L;
+				}
+			}
+		y = rve + rvbits;
+
+		/* adj *= ulp(dval(&rv)); */
+		/* if (asub) rv -= adj; else rv += adj; */
+
+		if (!denorm && rvbits < nbits) {
+			rvb = lshift(rvb, j = nbits - rvbits);
+			rve -= j;
+			rvbits = nbits;
+			}
+		ab = d2b(dval(&adj), &abe, &abits);
+		if (abe < 0)
+			rshift(ab, -abe);
+		else if (abe > 0)
+			ab = lshift(ab, abe);
+		rvb0 = rvb;
+		if (asub) {
+			/* rv -= adj; */
+			j = hi0bits(rvb->x[rvb->wds-1]);
+			rvb = diff(rvb, ab);
+			k = rvb0->wds - 1;
+			if (denorm)
+				/* do nothing */;
+			else if (rvb->wds <= k
+				|| hi0bits( rvb->x[k]) >
+				   hi0bits(rvb0->x[k])) {
+				/* unlikely; can only have lost 1 high bit */
+				if (rve1 == emin) {
+					--rvbits;
+					denorm = 1;
+					}
+				else {
+					rvb = lshift(rvb, 1);
+					--rve;
+					--rve1;
+					L = finished = 0;
+					}
+				}
+			}
+		else {
+			rvb = sum(rvb, ab);
+			k = rvb->wds - 1;
+			if (k >= rvb0->wds
+			 || hi0bits(rvb->x[k]) < hi0bits(rvb0->x[k])) {
+				if (denorm) {
+					if (++rvbits == nbits)
+						denorm = 0;
+					}
+				else {
+					rshift(rvb, 1);
+					rve++;
+					rve1++;
+					L = 0;
+					}
+				}
+			}
+		Bfree(ab);
+		Bfree(rvb0);
+		if (finished)
+			break;
+
+		z = rve + rvbits;
+		if (y == z && L) {
+			/* Can we stop now? */
+			tol = dval(&adj) * 5e-16; /* > max rel error */
+			dval(&adj) = adj0 - .5;
+			if (dval(&adj) < -tol) {
+				if (adj0 > tol) {
+					irv |= inex;
+					break;
+					}
+				}
+			else if (dval(&adj) > tol && adj0 < 1. - tol) {
+				irv |= inex;
+				break;
+				}
+			}
+		bb0 = denorm ? 0 : trailz(rvb);
+		Bfree(bb);
+		Bfree(bd);
+		Bfree(bs);
+		Bfree(delta);
+		}
+	if (!denorm && (j = nbits - rvbits)) {
+		if (j > 0)
+			rvb = lshift(rvb, j);
+		else
+			rshift(rvb, -j);
+		rve -= j;
+		}
+	*exp = rve;
+	Bfree(bb);
+	Bfree(bd);
+	Bfree(bs);
+	Bfree(bd0);
+	Bfree(delta);
+	if (rve > fpi->emax) {
+		switch(fpi->rounding & 3) {
+		  case FPI_Round_near:
+			goto huge;
+		  case FPI_Round_up:
+			if (!sign)
+				goto huge;
+			break;
+		  case FPI_Round_down:
+			if (sign)
+				goto huge;
+		  }
+		/* Round to largest representable magnitude */
+		Bfree(rvb);
+		rvb = 0;
+		irv = STRTOG_Normal | STRTOG_Inexlo;
+		*exp = fpi->emax;
+		b = bits;
+		be = b + ((fpi->nbits + 31) >> 5);
+		while(b < be)
+			*b++ = -1;
+		if ((j = fpi->nbits & 0x1f))
+			*--be >>= (32 - j);
+		goto ret;
+ huge:
+		rvb->wds = 0;
+		irv = STRTOG_Infinite | STRTOG_Overflow | STRTOG_Inexhi;
+#ifndef NO_ERRNO
+		errno = ERANGE;
+#endif
+ infnanexp:
+		*exp = fpi->emax + 1;
+		}
+ ret:
+	if (denorm) {
+		if (sudden_underflow) {
+			rvb->wds = 0;
+			irv = STRTOG_Underflow | STRTOG_Inexlo;
+#ifndef NO_ERRNO
+			errno = ERANGE;
+#endif
+			}
+		else  {
+			irv = (irv & ~STRTOG_Retmask) |
+				(rvb->wds > 0 ? STRTOG_Denormal : STRTOG_Zero);
+			if (irv & STRTOG_Inexact) {
+				irv |= STRTOG_Underflow;
+#ifndef NO_ERRNO
+				errno = ERANGE;
+#endif
+				}
+			}
+		}
+	if (se)
+		*se = (char *)s;
+	if (sign)
+		irv |= STRTOG_Neg;
+	if (rvb) {
+		copybits(bits, nbits, rvb);
+		Bfree(rvb);
+		}
+	return irv;
+	}
diff --git a/libraries/gdtoa/strtodnrp.c b/libraries/gdtoa/strtodnrp.c
new file mode 100644
index 000000000..19a769f0b
--- /dev/null
+++ b/libraries/gdtoa/strtodnrp.c
@@ -0,0 +1,87 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 2004 by David M. Gay.
+All Rights Reserved
+Based on material in the rest of /netlib/fp/gdota.tar.gz,
+which is copyright (C) 1998, 2000 by Lucent Technologies.
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* This is a variant of strtod that works on Intel ia32 systems */
+/* with the default extended-precision arithmetic -- it does not */
+/* require setting the precision control to 53 bits.  */
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+ double
+#ifdef KR_headers
+strtod(s, sp) CONST char *s; char **sp;
+#else
+strtod(CONST char *s, char **sp)
+#endif
+{
+	static FPI fpi = { 53, 1-1023-53+1, 2046-1023-53+1, 1, SI };
+	ULong bits[2];
+	Long exp;
+	int k;
+	union { ULong L[2]; double d; } u;
+
+	k = strtodg(s, sp, &fpi, &exp, bits);
+	switch(k & STRTOG_Retmask) {
+	  case STRTOG_NoNumber:
+	  case STRTOG_Zero:
+		u.L[0] = u.L[1] = 0;
+		break;
+
+	  case STRTOG_Normal:
+		u.L[_1] = bits[0];
+		u.L[_0] = (bits[1] & ~0x100000) | ((exp + 0x3ff + 52) << 20);
+		break;
+
+	  case STRTOG_Denormal:
+		u.L[_1] = bits[0];
+		u.L[_0] = bits[1];
+		break;
+
+	  case STRTOG_Infinite:
+		u.L[_0] = 0x7ff00000;
+		u.L[_1] = 0;
+		break;
+
+	  case STRTOG_NaN:
+		u.L[0] = d_QNAN0;
+		u.L[1] = d_QNAN1;
+		break;
+
+	  case STRTOG_NaNbits:
+		u.L[_0] = 0x7ff00000 | bits[1];
+		u.L[_1] = bits[0];
+	  }
+	if (k & STRTOG_Neg)
+		u.L[_0] |= 0x80000000L;
+	return u.d;
+	}
diff --git a/libraries/gdtoa/strtof.c b/libraries/gdtoa/strtof.c
new file mode 100644
index 000000000..a8beb3520
--- /dev/null
+++ b/libraries/gdtoa/strtof.c
@@ -0,0 +1,78 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998, 2000 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+ float
+#ifdef KR_headers
+strtof(s, sp) CONST char *s; char **sp;
+#else
+strtof(CONST char *s, char **sp)
+#endif
+{
+	static FPI fpi0 = { 24, 1-127-24+1,  254-127-24+1, 1, SI };
+	ULong bits[1];
+	Long exp;
+	int k;
+	union { ULong L[1]; float f; } u;
+#ifdef Honor_FLT_ROUNDS
+#include "gdtoa_fltrnds.h"
+#else
+#define fpi &fpi0
+#endif
+
+	k = strtodg(s, sp, fpi, &exp, bits);
+	switch(k & STRTOG_Retmask) {
+	  case STRTOG_NoNumber:
+	  case STRTOG_Zero:
+		u.L[0] = 0;
+		break;
+
+	  case STRTOG_Normal:
+	  case STRTOG_NaNbits:
+		u.L[0] = (bits[0] & 0x7fffff) | ((exp + 0x7f + 23) << 23);
+		break;
+
+	  case STRTOG_Denormal:
+		u.L[0] = bits[0];
+		break;
+
+	  case STRTOG_Infinite:
+		u.L[0] = 0x7f800000;
+		break;
+
+	  case STRTOG_NaN:
+		u.L[0] = f_QNAN;
+	  }
+	if (k & STRTOG_Neg)
+		u.L[0] |= 0x80000000L;
+	return u.f;
+	}
diff --git a/libraries/gdtoa/strtopQ.c b/libraries/gdtoa/strtopQ.c
new file mode 100644
index 000000000..2acf7e910
--- /dev/null
+++ b/libraries/gdtoa/strtopQ.c
@@ -0,0 +1,109 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998, 2000 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+#undef _0
+#undef _1
+
+/* one or the other of IEEE_MC68k or IEEE_8087 should be #defined */
+
+#ifdef IEEE_MC68k
+#define _0 0
+#define _1 1
+#define _2 2
+#define _3 3
+#endif
+#ifdef IEEE_8087
+#define _0 3
+#define _1 2
+#define _2 1
+#define _3 0
+#endif
+
+ extern ULong NanDflt_Q_D2A[4];
+
+
+ int
+#ifdef KR_headers
+strtopQ(s, sp, V) CONST char *s; char **sp; void *V;
+#else
+strtopQ(CONST char *s, char **sp, void *V)
+#endif
+{
+	static FPI fpi0 = { 113, 1-16383-113+1, 32766 - 16383 - 113 + 1, 1, SI };
+	ULong bits[4];
+	Long exp;
+	int k;
+	ULong *L = (ULong*)V;
+#ifdef Honor_FLT_ROUNDS
+#include "gdtoa_fltrnds.h"
+#else
+#define fpi &fpi0
+#endif
+
+	k = strtodg(s, sp, fpi, &exp, bits);
+	switch(k & STRTOG_Retmask) {
+	  case STRTOG_NoNumber:
+	  case STRTOG_Zero:
+		L[0] = L[1] = L[2] = L[3] = 0;
+		break;
+
+	  case STRTOG_Normal:
+	  case STRTOG_NaNbits:
+		L[_3] = bits[0];
+		L[_2] = bits[1];
+		L[_1] = bits[2];
+		L[_0] = (bits[3] & ~0x10000) | ((exp + 0x3fff + 112) << 16);
+		break;
+
+	  case STRTOG_Denormal:
+		L[_3] = bits[0];
+		L[_2] = bits[1];
+		L[_1] = bits[2];
+		L[_0] = bits[3];
+		break;
+
+	  case STRTOG_Infinite:
+		L[_0] = 0x7fff0000;
+		L[_1] = L[_2] = L[_3] = 0;
+		break;
+
+	  case STRTOG_NaN:
+		L[_0] = NanDflt_Q_D2A[3];
+		L[_1] = NanDflt_Q_D2A[2];
+		L[_2] = NanDflt_Q_D2A[1];
+		L[_3] = NanDflt_Q_D2A[0];
+	  }
+	if (k & STRTOG_Neg)
+		L[_0] |= 0x80000000L;
+	return k;
+	}
diff --git a/libraries/gdtoa/strtopd.c b/libraries/gdtoa/strtopd.c
new file mode 100644
index 000000000..0fb35daea
--- /dev/null
+++ b/libraries/gdtoa/strtopd.c
@@ -0,0 +1,54 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+ int
+#ifdef KR_headers
+strtopd(s, sp, d) char *s; char **sp; double *d;
+#else
+strtopd(CONST char *s, char **sp, double *d)
+#endif
+{
+	static FPI fpi0 = { 53, 1-1023-53+1, 2046-1023-53+1, 1, SI };
+	ULong bits[2];
+	Long exp;
+	int k;
+#ifdef Honor_FLT_ROUNDS
+#include "gdtoa_fltrnds.h"
+#else
+#define fpi &fpi0
+#endif
+
+	k = strtodg(s, sp, fpi, &exp, bits);
+	ULtod((ULong*)d, bits, exp, k);
+	return k;
+	}
diff --git a/libraries/gdtoa/strtopdd.c b/libraries/gdtoa/strtopdd.c
new file mode 100644
index 000000000..738372d88
--- /dev/null
+++ b/libraries/gdtoa/strtopdd.c
@@ -0,0 +1,183 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998, 2000 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+ int
+#ifdef KR_headers
+strtopdd(s, sp, dd) CONST char *s; char **sp; double *dd;
+#else
+strtopdd(CONST char *s, char **sp, double *dd)
+#endif
+{
+#ifdef Sudden_Underflow
+	static FPI fpi0 = { 106, 1-1023, 2046-1023-106+1, 1, 1 };
+#else
+	static FPI fpi0 = { 106, 1-1023-53+1, 2046-1023-106+1, 1, 0 };
+#endif
+	ULong bits[4];
+	Long exp;
+	int i, j, rv;
+	typedef union {
+		double d[2];
+		ULong L[4];
+		} U;
+	U *u;
+#ifdef Honor_FLT_ROUNDS
+#include "gdtoa_fltrnds.h"
+#else
+#define fpi &fpi0
+#endif
+
+	rv = strtodg(s, sp, fpi, &exp, bits);
+	u = (U*)dd;
+	switch(rv & STRTOG_Retmask) {
+	  case STRTOG_NoNumber:
+	  case STRTOG_Zero:
+		u->d[0] = u->d[1] = 0.;
+		break;
+
+	  case STRTOG_Normal:
+		u->L[_1] = (bits[1] >> 21 | bits[2] << 11) & 0xffffffffL;
+		u->L[_0] = (bits[2] >> 21) | ((bits[3] << 11) & 0xfffff)
+			  | ((exp + 0x3ff + 105) << 20);
+		exp += 0x3ff + 52;
+		if (bits[1] &= 0x1fffff) {
+			i = hi0bits(bits[1]) - 11;
+			if (i >= exp) {
+				i = exp - 1;
+				exp = 0;
+				}
+			else
+				exp -= i;
+			if (i > 0) {
+				bits[1] = bits[1] << i | bits[0] >> (32-i);
+				bits[0] = bits[0] << i & 0xffffffffL;
+				}
+			}
+		else if (bits[0]) {
+			i = hi0bits(bits[0]) + 21;
+			if (i >= exp) {
+				i = exp - 1;
+				exp = 0;
+				}
+			else
+				exp -= i;
+			if (i < 32) {
+				bits[1] = bits[0] >> (32 - i);
+				bits[0] = bits[0] << i & 0xffffffffL;
+				}
+			else {
+				bits[1] = bits[0] << (i - 32);
+				bits[0] = 0;
+				}
+			}
+		else {
+			u->L[2] = u->L[3] = 0;
+			break;
+			}
+		u->L[2+_1] = bits[0];
+		u->L[2+_0] = (bits[1] & 0xfffff) | (exp << 20);
+		break;
+
+	  case STRTOG_Denormal:
+		if (bits[3])
+			goto nearly_normal;
+		if (bits[2])
+			goto partly_normal;
+		if (bits[1] & 0xffe00000)
+			goto hardly_normal;
+		/* completely denormal */
+		u->L[2] = u->L[3] = 0;
+		u->L[_1] = bits[0];
+		u->L[_0] = bits[1];
+		break;
+
+	  nearly_normal:
+		i = hi0bits(bits[3]) - 11;	/* i >= 12 */
+		j = 32 - i;
+		u->L[_0] = ((bits[3] << i | bits[2] >> j) & 0xfffff)
+			| ((65 - i) << 20);
+		u->L[_1] = (bits[2] << i | bits[1] >> j) & 0xffffffffL;
+		u->L[2+_0] = bits[1] & ((1L << j) - 1);
+		u->L[2+_1] = bits[0];
+		break;
+
+	  partly_normal:
+		i = hi0bits(bits[2]) - 11;
+		if (i < 0) {
+			j = -i;
+			i += 32;
+			u->L[_0] = (bits[2] >> j & 0xfffff) | (33 + j) << 20;
+			u->L[_1] = ((bits[2] << i) | (bits[1] >> j)) & 0xffffffffL;
+			u->L[2+_0] = bits[1] & ((1L << j) - 1);
+			u->L[2+_1] = bits[0];
+			break;
+			}
+		if (i == 0) {
+			u->L[_0] = (bits[2] & 0xfffff) | (33 << 20);
+			u->L[_1] = bits[1];
+			u->L[2+_0] = 0;
+			u->L[2+_1] = bits[0];
+			break;
+			}
+		j = 32 - i;
+		u->L[_0] = (((bits[2] << i) | (bits[1] >> j)) & 0xfffff)
+				| ((j + 1) << 20);
+		u->L[_1] = (bits[1] << i | bits[0] >> j) & 0xffffffffL;
+		u->L[2+_0] = 0;
+		u->L[2+_1] = bits[0] & ((1L << j) - 1);
+		break;
+
+	  hardly_normal:
+		j = 11 - hi0bits(bits[1]);
+		i = 32 - j;
+		u->L[_0] = (bits[1] >> j & 0xfffff) | ((j + 1) << 20);
+		u->L[_1] = (bits[1] << i | bits[0] >> j) & 0xffffffffL;
+		u->L[2+_0] = 0;
+		u->L[2+_1] = bits[0] & ((1L << j) - 1);
+		break;
+
+	  case STRTOG_Infinite:
+		u->L[_0] = u->L[2+_0] = 0x7ff00000;
+		u->L[_1] = u->L[2+_1] = 0;
+		break;
+
+	  case STRTOG_NaN:
+		u->L[0] = u->L[2] = d_QNAN0;
+		u->L[1] = u->L[3] = d_QNAN1;
+	  }
+	if (rv & STRTOG_Neg) {
+		u->L[  _0] |= 0x80000000L;
+		u->L[2+_0] |= 0x80000000L;
+		}
+	return rv;
+	}
diff --git a/libraries/gdtoa/strtopf.c b/libraries/gdtoa/strtopf.c
new file mode 100644
index 000000000..23ca5cbe5
--- /dev/null
+++ b/libraries/gdtoa/strtopf.c
@@ -0,0 +1,78 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998, 2000 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+ int
+#ifdef KR_headers
+strtopf(s, sp, f) CONST char *s; char **sp; float *f;
+#else
+strtopf(CONST char *s, char **sp, float *f)
+#endif
+{
+	static FPI fpi0 = { 24, 1-127-24+1,  254-127-24+1, 1, SI };
+	ULong bits[1], *L;
+	Long exp;
+	int k;
+#ifdef Honor_FLT_ROUNDS
+#include "gdtoa_fltrnds.h"
+#else
+#define fpi &fpi0
+#endif
+
+	k = strtodg(s, sp, fpi, &exp, bits);
+	L = (ULong*)f;
+	switch(k & STRTOG_Retmask) {
+	  case STRTOG_NoNumber:
+	  case STRTOG_Zero:
+		L[0] = 0;
+		break;
+
+	  case STRTOG_Normal:
+	  case STRTOG_NaNbits:
+		L[0] = (bits[0] & 0x7fffff) | ((exp + 0x7f + 23) << 23);
+		break;
+
+	  case STRTOG_Denormal:
+		L[0] = bits[0];
+		break;
+
+	  case STRTOG_Infinite:
+		L[0] = 0x7f800000;
+		break;
+
+	  case STRTOG_NaN:
+		L[0] = f_QNAN;
+	  }
+	if (k & STRTOG_Neg)
+		L[0] |= 0x80000000L;
+	return k;
+	}
diff --git a/libraries/gdtoa/strtopx.c b/libraries/gdtoa/strtopx.c
new file mode 100644
index 000000000..32192c572
--- /dev/null
+++ b/libraries/gdtoa/strtopx.c
@@ -0,0 +1,111 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998, 2000 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+ extern UShort NanDflt_ldus_D2A[5];
+
+#undef _0
+#undef _1
+
+/* one or the other of IEEE_MC68k or IEEE_8087 should be #defined */
+
+#ifdef IEEE_MC68k
+#define _0 0
+#define _1 1
+#define _2 2
+#define _3 3
+#define _4 4
+#endif
+#ifdef IEEE_8087
+#define _0 4
+#define _1 3
+#define _2 2
+#define _3 1
+#define _4 0
+#endif
+
+ int
+#ifdef KR_headers
+strtopx(s, sp, V) CONST char *s; char **sp; void *V;
+#else
+strtopx(CONST char *s, char **sp, void *V)
+#endif
+{
+	static FPI fpi0 = { 64, 1-16383-64+1, 32766 - 16383 - 64 + 1, 1, SI };
+	ULong bits[2];
+	Long exp;
+	int k;
+	UShort *L = (UShort*)V;
+#ifdef Honor_FLT_ROUNDS
+#include "gdtoa_fltrnds.h"
+#else
+#define fpi &fpi0
+#endif
+
+	k = strtodg(s, sp, fpi, &exp, bits);
+	switch(k & STRTOG_Retmask) {
+	  case STRTOG_NoNumber:
+	  case STRTOG_Zero:
+		L[0] = L[1] = L[2] = L[3] = L[4] = 0;
+		break;
+
+	  case STRTOG_Denormal:
+		L[_0] = 0;
+		goto normal_bits;
+
+	  case STRTOG_Normal:
+	  case STRTOG_NaNbits:
+		L[_0] = exp + 0x3fff + 63;
+ normal_bits:
+		L[_4] = (UShort)bits[0];
+		L[_3] = (UShort)(bits[0] >> 16);
+		L[_2] = (UShort)bits[1];
+		L[_1] = (UShort)(bits[1] >> 16);
+		break;
+
+	  case STRTOG_Infinite:
+		L[_0] = 0x7fff;
+		L[_1] = 0x8000;
+		L[_2] = L[_3] = L[_4] = 0;
+		break;
+
+	  case STRTOG_NaN:
+		L[_4] = NanDflt_ldus_D2A[0];
+		L[_3] = NanDflt_ldus_D2A[1];
+		L[_2] = NanDflt_ldus_D2A[2];
+		L[_1] = NanDflt_ldus_D2A[3];
+		L[_0] = NanDflt_ldus_D2A[4];
+	  }
+	if (k & STRTOG_Neg)
+		L[_0] |= 0x8000;
+	return k;
+	}
diff --git a/libraries/gdtoa/strtopxL.c b/libraries/gdtoa/strtopxL.c
new file mode 100644
index 000000000..6166c1e62
--- /dev/null
+++ b/libraries/gdtoa/strtopxL.c
@@ -0,0 +1,99 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998, 2000 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+ extern ULong NanDflt_xL_D2A[3];
+
+#undef _0
+#undef _1
+
+/* one or the other of IEEE_MC68k or IEEE_8087 should be #defined */
+
+#ifdef IEEE_MC68k
+#define _0 0
+#define _1 1
+#define _2 2
+#endif
+#ifdef IEEE_8087
+#define _0 2
+#define _1 1
+#define _2 0
+#endif
+
+ int
+#ifdef KR_headers
+strtopxL(s, sp, V) CONST char *s; char **sp; void *V;
+#else
+strtopxL(CONST char *s, char **sp, void *V)
+#endif
+{
+	static FPI fpi0 = { 64, 1-16383-64+1, 32766 - 16383 - 64 + 1, 1, SI };
+	ULong bits[2];
+	Long exp;
+	int k;
+	ULong *L = (ULong*)V;
+#ifdef Honor_FLT_ROUNDS
+#include "gdtoa_fltrnds.h"
+#else
+#define fpi &fpi0
+#endif
+
+	k = strtodg(s, sp, fpi, &exp, bits);
+	switch(k & STRTOG_Retmask) {
+	  case STRTOG_NoNumber:
+	  case STRTOG_Zero:
+		L[0] = L[1] = L[2] = 0;
+		break;
+
+	  case STRTOG_Normal:
+	  case STRTOG_Denormal:
+	  case STRTOG_NaNbits:
+		L[_2] = bits[0];
+		L[_1] = bits[1];
+		L[_0] = (exp + 0x3fff + 63) << 16;
+		break;
+
+	  case STRTOG_Infinite:
+		L[_0] = 0x7fff << 16;
+		L[_1] = 0x80000000;
+		L[_2] = 0;
+		break;
+
+	  case STRTOG_NaN:
+		L[_0] = NanDflt_xL_D2A[2];
+		L[_1] = NanDflt_xL_D2A[1];
+		L[_2] = NanDflt_xL_D2A[0];
+	  }
+	if (k & STRTOG_Neg)
+		L[_0] |= 0x80000000L;
+	return k;
+	}
diff --git a/libraries/gdtoa/strtorQ.c b/libraries/gdtoa/strtorQ.c
new file mode 100644
index 000000000..f5fd7bba9
--- /dev/null
+++ b/libraries/gdtoa/strtorQ.c
@@ -0,0 +1,119 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998, 2000 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+#undef _0
+#undef _1
+
+/* one or the other of IEEE_MC68k or IEEE_8087 should be #defined */
+
+#ifdef IEEE_MC68k
+#define _0 0
+#define _1 1
+#define _2 2
+#define _3 3
+#endif
+#ifdef IEEE_8087
+#define _0 3
+#define _1 2
+#define _2 1
+#define _3 0
+#endif
+
+ extern ULong NanDflt_Q_D2A[4];
+
+ void
+#ifdef KR_headers
+ULtoQ(L, bits, exp, k) ULong *L; ULong *bits; Long exp; int k;
+#else
+ULtoQ(ULong *L, ULong *bits, Long exp, int k)
+#endif
+{
+	switch(k & STRTOG_Retmask) {
+	  case STRTOG_NoNumber:
+	  case STRTOG_Zero:
+		L[0] = L[1] = L[2] = L[3] = 0;
+		break;
+
+	  case STRTOG_Normal:
+	  case STRTOG_NaNbits:
+		L[_3] = bits[0];
+		L[_2] = bits[1];
+		L[_1] = bits[2];
+		L[_0] = (bits[3] & ~0x10000) | ((exp + 0x3fff + 112) << 16);
+		break;
+
+	  case STRTOG_Denormal:
+		L[_3] = bits[0];
+		L[_2] = bits[1];
+		L[_1] = bits[2];
+		L[_0] = bits[3];
+		break;
+
+	  case STRTOG_Infinite:
+		L[_0] = 0x7fff0000;
+		L[_1] = L[_2] = L[_3] = 0;
+		break;
+
+	  case STRTOG_NaN:
+		L[_0] = NanDflt_Q_D2A[3];
+		L[_1] = NanDflt_Q_D2A[2];
+		L[_2] = NanDflt_Q_D2A[1];
+		L[_3] = NanDflt_Q_D2A[0];
+	  }
+	if (k & STRTOG_Neg)
+		L[_0] |= 0x80000000L;
+	}
+
+ int
+#ifdef KR_headers
+strtorQ(s, sp, rounding, L) CONST char *s; char **sp; int rounding; void *L;
+#else
+strtorQ(CONST char *s, char **sp, int rounding, void *L)
+#endif
+{
+	static FPI fpi0 = { 113, 1-16383-113+1, 32766-16383-113+1, 1, SI };
+	FPI *fpi, fpi1;
+	ULong bits[4];
+	Long exp;
+	int k;
+
+	fpi = &fpi0;
+	if (rounding != FPI_Round_near) {
+		fpi1 = fpi0;
+		fpi1.rounding = rounding;
+		fpi = &fpi1;
+		}
+	k = strtodg(s, sp, fpi, &exp, bits);
+	ULtoQ((ULong*)L, bits, exp, k);
+	return k;
+	}
diff --git a/libraries/gdtoa/strtord.c b/libraries/gdtoa/strtord.c
new file mode 100644
index 000000000..dd0769698
--- /dev/null
+++ b/libraries/gdtoa/strtord.c
@@ -0,0 +1,95 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998, 2000 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+ extern ULong NanDflt_d_D2A[2];
+
+ void
+#ifdef KR_headers
+ULtod(L, bits, exp, k) ULong *L; ULong *bits; Long exp; int k;
+#else
+ULtod(ULong *L, ULong *bits, Long exp, int k)
+#endif
+{
+	switch(k & STRTOG_Retmask) {
+	  case STRTOG_NoNumber:
+	  case STRTOG_Zero:
+		L[0] = L[1] = 0;
+		break;
+
+	  case STRTOG_Denormal:
+		L[_1] = bits[0];
+		L[_0] = bits[1];
+		break;
+
+	  case STRTOG_Normal:
+	  case STRTOG_NaNbits:
+		L[_1] = bits[0];
+		L[_0] = (bits[1] & ~0x100000) | ((exp + 0x3ff + 52) << 20);
+		break;
+
+	  case STRTOG_Infinite:
+		L[_0] = 0x7ff00000;
+		L[_1] = 0;
+		break;
+
+	  case STRTOG_NaN:
+		L[_0] = NanDflt_d_D2A[1];
+		L[_1] = NanDflt_d_D2A[0];
+	  }
+	if (k & STRTOG_Neg)
+		L[_0] |= 0x80000000L;
+	}
+
+ int
+#ifdef KR_headers
+strtord(s, sp, rounding, d) CONST char *s; char **sp; int rounding; double *d;
+#else
+strtord(CONST char *s, char **sp, int rounding, double *d)
+#endif
+{
+	static FPI fpi0 = { 53, 1-1023-53+1, 2046-1023-53+1, 1, SI };
+	FPI *fpi, fpi1;
+	ULong bits[2];
+	Long exp;
+	int k;
+
+	fpi = &fpi0;
+	if (rounding != FPI_Round_near) {
+		fpi1 = fpi0;
+		fpi1.rounding = rounding;
+		fpi = &fpi1;
+		}
+	k = strtodg(s, sp, fpi, &exp, bits);
+	ULtod((ULong*)d, bits, exp, k);
+	return k;
+	}
diff --git a/libraries/gdtoa/strtordd.c b/libraries/gdtoa/strtordd.c
new file mode 100644
index 000000000..62152dbd4
--- /dev/null
+++ b/libraries/gdtoa/strtordd.c
@@ -0,0 +1,202 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998, 2000 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+ extern ULong NanDflt_d_D2A[2];
+
+ void
+#ifdef KR_headers
+ULtodd(L, bits, exp, k) ULong *L; ULong *bits; Long exp; int k;
+#else
+ULtodd(ULong *L, ULong *bits, Long exp, int k)
+#endif
+{
+	int i, j;
+
+	switch(k & STRTOG_Retmask) {
+	  case STRTOG_NoNumber:
+	  case STRTOG_Zero:
+		L[0] = L[1] = L[2] = L[3] = 0;
+		break;
+
+	  case STRTOG_Normal:
+		L[_1] = (bits[1] >> 21 | bits[2] << 11) & (ULong)0xffffffffL;
+		L[_0] = (bits[2] >> 21) | (bits[3] << 11 & 0xfffff)
+			  | ((exp + 0x3ff + 105) << 20);
+		exp += 0x3ff + 52;
+		if (bits[1] &= 0x1fffff) {
+			i = hi0bits(bits[1]) - 11;
+			if (i >= exp) {
+				i = exp - 1;
+				exp = 0;
+				}
+			else
+				exp -= i;
+			if (i > 0) {
+				bits[1] = bits[1] << i | bits[0] >> (32-i);
+				bits[0] = bits[0] << i & (ULong)0xffffffffL;
+				}
+			}
+		else if (bits[0]) {
+			i = hi0bits(bits[0]) + 21;
+			if (i >= exp) {
+				i = exp - 1;
+				exp = 0;
+				}
+			else
+				exp -= i;
+			if (i < 32) {
+				bits[1] = bits[0] >> (32 - i);
+				bits[0] = bits[0] << i & (ULong)0xffffffffL;
+				}
+			else {
+				bits[1] = bits[0] << (i - 32);
+				bits[0] = 0;
+				}
+			}
+		else {
+			L[2] = L[3] = 0;
+			break;
+			}
+		L[2+_1] = bits[0];
+		L[2+_0] = (bits[1] & 0xfffff) | (exp << 20);
+		break;
+
+	  case STRTOG_Denormal:
+		if (bits[3])
+			goto nearly_normal;
+		if (bits[2])
+			goto partly_normal;
+		if (bits[1] & 0xffe00000)
+			goto hardly_normal;
+		/* completely denormal */
+		L[2] = L[3] = 0;
+		L[_1] = bits[0];
+		L[_0] = bits[1];
+		break;
+
+	  nearly_normal:
+		i = hi0bits(bits[3]) - 11;	/* i >= 12 */
+		j = 32 - i;
+		L[_0] = ((bits[3] << i | bits[2] >> j) & 0xfffff)
+			| ((65 - i) << 20);
+		L[_1] = (bits[2] << i | bits[1] >> j) & 0xffffffffL;
+		L[2+_0] = bits[1] & (((ULong)1L << j) - 1);
+		L[2+_1] = bits[0];
+		break;
+
+	  partly_normal:
+		i = hi0bits(bits[2]) - 11;
+		if (i < 0) {
+			j = -i;
+			i += 32;
+			L[_0] = (bits[2] >> j & 0xfffff) | ((33 + j) << 20);
+			L[_1] = (bits[2] << i | bits[1] >> j) & 0xffffffffL;
+			L[2+_0] = bits[1] & (((ULong)1L << j) - 1);
+			L[2+_1] = bits[0];
+			break;
+			}
+		if (i == 0) {
+			L[_0] = (bits[2] & 0xfffff) | (33 << 20);
+			L[_1] = bits[1];
+			L[2+_0] = 0;
+			L[2+_1] = bits[0];
+			break;
+			}
+		j = 32 - i;
+		L[_0] = (((bits[2] << i) | (bits[1] >> j)) & 0xfffff)
+				| ((j + 1) << 20);
+		L[_1] = (bits[1] << i | bits[0] >> j) & 0xffffffffL;
+		L[2+_0] = 0;
+		L[2+_1] = bits[0] & ((1L << j) - 1);
+		break;
+
+	  hardly_normal:
+		j = 11 - hi0bits(bits[1]);
+		i = 32 - j;
+		L[_0] = (bits[1] >> j & 0xfffff) | ((j + 1) << 20);
+		L[_1] = (bits[1] << i | bits[0] >> j) & 0xffffffffL;
+		L[2+_0] = 0;
+		L[2+_1] = bits[0] & (((ULong)1L << j) - 1);
+		break;
+
+	  case STRTOG_Infinite:
+		L[_0] = L[2+_0] = 0x7ff00000;
+		L[_1] = L[2+_1] = 0;
+		break;
+
+	  case STRTOG_NaN:
+		L[_0] = L[_0+2] = NanDflt_d_D2A[1];
+		L[_1] = L[_1+2] = NanDflt_d_D2A[0];
+		break;
+
+	  case STRTOG_NaNbits:
+		L[_1] = (bits[1] >> 20 | bits[2] << 12) & (ULong)0xffffffffL;
+		L[_0] = bits[2] >> 20 | bits[3] << 12;
+		L[_0] |= (L[_1] | L[_0]) ? (ULong)0x7ff00000L : (ULong)0x7ff80000L;
+		L[2+_1] = bits[0] & (ULong)0xffffffffL;
+		L[2+_0] = bits[1] & 0xfffffL;
+		L[2+_0] |= (L[2+_1] | L[2+_0]) ? (ULong)0x7ff00000L : (ULong)0x7ff80000L;
+	  }
+	if (k & STRTOG_Neg) {
+		L[_0] |= 0x80000000L;
+		L[2+_0] |= 0x80000000L;
+		}
+	}
+
+ int
+#ifdef KR_headers
+strtordd(s, sp, rounding, dd) CONST char *s; char **sp; int rounding; double *dd;
+#else
+strtordd(CONST char *s, char **sp, int rounding, double *dd)
+#endif
+{
+#ifdef Sudden_Underflow
+	static FPI fpi0 = { 106, 1-1023, 2046-1023-106+1, 1, 1 };
+#else
+	static FPI fpi0 = { 106, 1-1023-53+1, 2046-1023-106+1, 1, 0 };
+#endif
+	FPI *fpi, fpi1;
+	ULong bits[4];
+	Long exp;
+	int k;
+
+	fpi = &fpi0;
+	if (rounding != FPI_Round_near) {
+		fpi1 = fpi0;
+		fpi1.rounding = rounding;
+		fpi = &fpi1;
+		}
+	k = strtodg(s, sp, fpi, &exp, bits);
+	ULtodd((ULong*)dd, bits, exp, k);
+	return k;
+	}
diff --git a/libraries/gdtoa/strtorf.c b/libraries/gdtoa/strtorf.c
new file mode 100644
index 000000000..99b4ab710
--- /dev/null
+++ b/libraries/gdtoa/strtorf.c
@@ -0,0 +1,91 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998, 2000 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+ extern ULong NanDflt_f_D2A[1];
+
+ void
+#ifdef KR_headers
+ULtof(L, bits, exp, k) ULong *L; ULong *bits; Long exp; int k;
+#else
+ULtof(ULong *L, ULong *bits, Long exp, int k)
+#endif
+{
+	switch(k & STRTOG_Retmask) {
+	  case STRTOG_NoNumber:
+	  case STRTOG_Zero:
+		*L = 0;
+		break;
+
+	  case STRTOG_Normal:
+	  case STRTOG_NaNbits:
+		L[0] = (bits[0] & 0x7fffff) | ((exp + 0x7f + 23) << 23);
+		break;
+
+	  case STRTOG_Denormal:
+		L[0] = bits[0];
+		break;
+
+	  case STRTOG_Infinite:
+		L[0] = 0x7f800000;
+		break;
+
+	  case STRTOG_NaN:
+		L[0] = NanDflt_f_D2A[0];
+	  }
+	if (k & STRTOG_Neg)
+		L[0] |= 0x80000000L;
+	}
+
+ int
+#ifdef KR_headers
+strtorf(s, sp, rounding, f) CONST char *s; char **sp; int rounding; float *f;
+#else
+strtorf(CONST char *s, char **sp, int rounding, float *f)
+#endif
+{
+	static FPI fpi0 = { 24, 1-127-24+1,  254-127-24+1, 1, SI };
+	FPI *fpi, fpi1;
+	ULong bits[1];
+	Long exp;
+	int k;
+
+	fpi = &fpi0;
+	if (rounding != FPI_Round_near) {
+		fpi1 = fpi0;
+		fpi1.rounding = rounding;
+		fpi = &fpi1;
+		}
+	k = strtodg(s, sp, fpi, &exp, bits);
+	ULtof((ULong*)f, bits, exp, k);
+	return k;
+	}
diff --git a/libraries/gdtoa/strtorx.c b/libraries/gdtoa/strtorx.c
new file mode 100644
index 000000000..994ce8e63
--- /dev/null
+++ b/libraries/gdtoa/strtorx.c
@@ -0,0 +1,122 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998, 2000 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+#undef _0
+#undef _1
+
+/* one or the other of IEEE_MC68k or IEEE_8087 should be #defined */
+
+#ifdef IEEE_MC68k
+#define _0 0
+#define _1 1
+#define _2 2
+#define _3 3
+#define _4 4
+#endif
+#ifdef IEEE_8087
+#define _0 4
+#define _1 3
+#define _2 2
+#define _3 1
+#define _4 0
+#endif
+
+ extern UShort NanDflt_ldus_D2A[5];
+
+ void
+#ifdef KR_headers
+ULtox(L, bits, exp, k) UShort *L; ULong *bits; Long exp; int k;
+#else
+ULtox(UShort *L, ULong *bits, Long exp, int k)
+#endif
+{
+	switch(k & STRTOG_Retmask) {
+	  case STRTOG_NoNumber:
+	  case STRTOG_Zero:
+		L[0] = L[1] = L[2] = L[3] = L[4] = 0;
+		break;
+
+	  case STRTOG_Denormal:
+		L[_0] = 0;
+		goto normal_bits;
+
+	  case STRTOG_Normal:
+	  case STRTOG_NaNbits:
+		L[_0] = exp + 0x3fff + 63;
+ normal_bits:
+		L[_4] = (UShort)bits[0];
+		L[_3] = (UShort)(bits[0] >> 16);
+		L[_2] = (UShort)bits[1];
+		L[_1] = (UShort)(bits[1] >> 16);
+		break;
+
+	  case STRTOG_Infinite:
+		L[_0] = 0x7fff;
+		L[_1] = 0x8000;
+		L[_2] = L[_3] = L[_4] = 0;
+		break;
+
+	  case STRTOG_NaN:
+		L[_4] = NanDflt_ldus_D2A[0];
+		L[_3] = NanDflt_ldus_D2A[1];
+		L[_2] = NanDflt_ldus_D2A[2];
+		L[_1] = NanDflt_ldus_D2A[3];
+		L[_0] = NanDflt_ldus_D2A[4];
+	  }
+	if (k & STRTOG_Neg)
+		L[_0] |= 0x8000;
+	}
+
+ int
+#ifdef KR_headers
+strtorx(s, sp, rounding, L) CONST char *s; char **sp; int rounding; void *L;
+#else
+strtorx(CONST char *s, char **sp, int rounding, void *L)
+#endif
+{
+	static FPI fpi0 = { 64, 1-16383-64+1, 32766 - 16383 - 64 + 1, 1, SI };
+	FPI *fpi, fpi1;
+	ULong bits[2];
+	Long exp;
+	int k;
+
+	fpi = &fpi0;
+	if (rounding != FPI_Round_near) {
+		fpi1 = fpi0;
+		fpi1.rounding = rounding;
+		fpi = &fpi1;
+		}
+	k = strtodg(s, sp, fpi, &exp, bits);
+	ULtox((UShort*)L, bits, exp, k);
+	return k;
+	}
diff --git a/libraries/gdtoa/strtorxL.c b/libraries/gdtoa/strtorxL.c
new file mode 100644
index 000000000..bac4a0bb1
--- /dev/null
+++ b/libraries/gdtoa/strtorxL.c
@@ -0,0 +1,110 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998, 2000 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+#undef _0
+#undef _1
+
+/* one or the other of IEEE_MC68k or IEEE_8087 should be #defined */
+
+#ifdef IEEE_MC68k
+#define _0 0
+#define _1 1
+#define _2 2
+#endif
+#ifdef IEEE_8087
+#define _0 2
+#define _1 1
+#define _2 0
+#endif
+
+ extern ULong NanDflt_xL_D2A[3];
+
+ void
+#ifdef KR_headers
+ULtoxL(L, bits, exp, k) ULong *L; ULong *bits; Long exp; int k;
+#else
+ULtoxL(ULong *L, ULong *bits, Long exp, int k)
+#endif
+{
+	switch(k & STRTOG_Retmask) {
+	  case STRTOG_NoNumber:
+	  case STRTOG_Zero:
+		L[0] = L[1] = L[2] = 0;
+		break;
+
+	  case STRTOG_Normal:
+	  case STRTOG_Denormal:
+	  case STRTOG_NaNbits:
+		L[_0] = (exp + 0x3fff + 63) << 16;
+		L[_1] = bits[1];
+		L[_2] = bits[0];
+		break;
+
+	  case STRTOG_Infinite:
+		L[_0] = 0x7fff0000;
+		L[_1] = 0x80000000;
+		L[_2] = 0;
+		break;
+
+	  case STRTOG_NaN:
+		L[_0] = NanDflt_xL_D2A[2];
+		L[_1] = NanDflt_xL_D2A[1];
+		L[_2] = NanDflt_xL_D2A[0];
+	  }
+	if (k & STRTOG_Neg)
+		L[_0] |= 0x80000000L;
+	}
+
+ int
+#ifdef KR_headers
+strtorxL(s, sp, rounding, L) CONST char *s; char **sp; int rounding; void *L;
+#else
+strtorxL(CONST char *s, char **sp, int rounding, void *L)
+#endif
+{
+	static FPI fpi0 = { 64, 1-16383-64+1, 32766 - 16383 - 64 + 1, 1, SI };
+	FPI *fpi, fpi1;
+	ULong bits[2];
+	Long exp;
+	int k;
+
+	fpi = &fpi0;
+	if (rounding != FPI_Round_near) {
+		fpi1 = fpi0;
+		fpi1.rounding = rounding;
+		fpi = &fpi1;
+		}
+	k = strtodg(s, sp, fpi, &exp, bits);
+	ULtoxL((ULong*)L, bits, exp, k);
+	return k;
+	}
diff --git a/libraries/gdtoa/sum.c b/libraries/gdtoa/sum.c
new file mode 100644
index 000000000..dc0c88bcf
--- /dev/null
+++ b/libraries/gdtoa/sum.c
@@ -0,0 +1,98 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+ Bigint *
+#ifdef KR_headers
+sum(a, b) Bigint *a; Bigint *b;
+#else
+sum(Bigint *a, Bigint *b)
+#endif
+{
+	Bigint *c;
+	ULong carry, *xc, *xa, *xb, *xe, y;
+#ifdef Pack_32
+	ULong z;
+#endif
+
+	if (a->wds < b->wds) {
+		c = b; b = a; a = c;
+		}
+	c = Balloc(a->k);
+	c->wds = a->wds;
+	carry = 0;
+	xa = a->x;
+	xb = b->x;
+	xc = c->x;
+	xe = xc + b->wds;
+#ifdef Pack_32
+	do {
+		y = (*xa & 0xffff) + (*xb & 0xffff) + carry;
+		carry = (y & 0x10000) >> 16;
+		z = (*xa++ >> 16) + (*xb++ >> 16) + carry;
+		carry = (z & 0x10000) >> 16;
+		Storeinc(xc, z, y);
+		}
+		while(xc < xe);
+	xe += a->wds - b->wds;
+	while(xc < xe) {
+		y = (*xa & 0xffff) + carry;
+		carry = (y & 0x10000) >> 16;
+		z = (*xa++ >> 16) + carry;
+		carry = (z & 0x10000) >> 16;
+		Storeinc(xc, z, y);
+		}
+#else
+	do {
+		y = *xa++ + *xb++ + carry;
+		carry = (y & 0x10000) >> 16;
+		*xc++ = y & 0xffff;
+		}
+		while(xc < xe);
+	xe += a->wds - b->wds;
+	while(xc < xe) {
+		y = *xa++ + carry;
+		carry = (y & 0x10000) >> 16;
+		*xc++ = y & 0xffff;
+		}
+#endif
+	if (carry) {
+		if (c->wds == c->maxwds) {
+			b = Balloc(c->k + 1);
+			Bcopy(b, c);
+			Bfree(c);
+			c = b;
+			}
+		c->x[c->wds++] = 1;
+		}
+	return c;
+	}
diff --git a/libraries/gdtoa/ulp.c b/libraries/gdtoa/ulp.c
new file mode 100644
index 000000000..17e9f862c
--- /dev/null
+++ b/libraries/gdtoa/ulp.c
@@ -0,0 +1,70 @@
+/****************************************************************
+
+The author of this software is David M. Gay.
+
+Copyright (C) 1998, 1999 by Lucent Technologies
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name of Lucent or any of its entities
+not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
+****************************************************************/
+
+/* Please send bug reports to David M. Gay (dmg at acm dot org,
+ * with " at " changed at "@" and " dot " changed to ".").	*/
+
+#include "gdtoaimp.h"
+
+ double
+ulp
+#ifdef KR_headers
+	(x) U *x;
+#else
+	(U *x)
+#endif
+{
+	Long L;
+	U a;
+
+	L = (word0(x) & Exp_mask) - (P-1)*Exp_msk1;
+#ifndef Sudden_Underflow
+	if (L > 0) {
+#endif
+#ifdef IBM
+		L |= Exp_msk1 >> 4;
+#endif
+		word0(&a) = L;
+		word1(&a) = 0;
+#ifndef Sudden_Underflow
+		}
+	else {
+		L = -L >> Exp_shift;
+		if (L < Exp_shift) {
+			word0(&a) = 0x80000 >> L;
+			word1(&a) = 0;
+			}
+		else {
+			word0(&a) = 0;
+			L -= Exp_shift;
+			word1(&a) = L >= 31 ? 1 : 1 << (31 - L);
+			}
+		}
+#endif
+	return dval(&a);
+	}
diff --git a/libraries/jpeg/CMakeLists.txt b/libraries/jpeg/CMakeLists.txt
new file mode 100644
index 000000000..33a3938cf
--- /dev/null
+++ b/libraries/jpeg/CMakeLists.txt
@@ -0,0 +1,36 @@
+cmake_minimum_required( VERSION 2.8.7 )
+
+make_release_only()
+
+if( ZD_CMAKE_COMPILER_IS_GNUC_COMPATIBLE )
+	set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wno-unused-parameter -fomit-frame-pointer" )
+endif()
+
+add_library( jpeg STATIC
+    jaricom.c
+    jcomapi.c
+    jdapimin.c
+    jdapistd.c
+    jdarith.c
+    jdatasrc.c
+    jdcoefct.c
+    jdcolor.c
+    jddctmgr.c
+    jdhuff.c
+    jdinput.c
+    jdmainct.c
+    jdmarker.c
+    jdmaster.c
+    jdmerge.c
+    jdpostct.c
+    jdsample.c
+    jerror.c
+    jidctflt.c
+    jidctfst.c
+    jidctint.c
+    jmemansi.c
+    jmemmgr.c
+    jquant1.c
+    jquant2.c
+    jutils.c )
+target_link_libraries( jpeg )
diff --git a/libraries/jpeg/README b/libraries/jpeg/README
new file mode 100644
index 000000000..56cdb6003
--- /dev/null
+++ b/libraries/jpeg/README
@@ -0,0 +1,378 @@
+The Independent JPEG Group's JPEG software
+==========================================
+
+README for release 9c of 14-Jan-2018
+====================================
+
+This distribution contains the ninth public release of the Independent JPEG
+Group's free JPEG software.  You are welcome to redistribute this software and
+to use it for any purpose, subject to the conditions under LEGAL ISSUES, below.
+
+This software is the work of Tom Lane, Guido Vollbeding, Philip Gladstone,
+Bill Allombert, Jim Boucher, Lee Crocker, Bob Friesenhahn, Ben Jackson,
+Julian Minguillon, Luis Ortiz, George Phillips, Davide Rossi, Ge' Weijers,
+and other members of the Independent JPEG Group.
+
+IJG is not affiliated with the ISO/IEC JTC1/SC29/WG1 standards committee
+(previously known as JPEG, together with ITU-T SG16).
+
+
+DOCUMENTATION ROADMAP
+=====================
+
+This file contains the following sections:
+
+OVERVIEW            General description of JPEG and the IJG software.
+LEGAL ISSUES        Copyright, lack of warranty, terms of distribution.
+REFERENCES          Where to learn more about JPEG.
+ARCHIVE LOCATIONS   Where to find newer versions of this software.
+ACKNOWLEDGMENTS     Special thanks.
+FILE FORMAT WARS    Software *not* to get.
+TO DO               Plans for future IJG releases.
+
+Other documentation files in the distribution are:
+
+User documentation:
+  install.txt       How to configure and install the IJG software.
+  usage.txt         Usage instructions for cjpeg, djpeg, jpegtran,
+                    rdjpgcom, and wrjpgcom.
+  *.1               Unix-style man pages for programs (same info as usage.txt).
+  wizard.txt        Advanced usage instructions for JPEG wizards only.
+  change.log        Version-to-version change highlights.
+Programmer and internal documentation:
+  libjpeg.txt       How to use the JPEG library in your own programs.
+  example.c         Sample code for calling the JPEG library.
+  structure.txt     Overview of the JPEG library's internal structure.
+  filelist.txt      Road map of IJG files.
+  coderules.txt     Coding style rules --- please read if you contribute code.
+
+Please read at least the files install.txt and usage.txt.  Some information
+can also be found in the JPEG FAQ (Frequently Asked Questions) article.  See
+ARCHIVE LOCATIONS below to find out where to obtain the FAQ article.
+
+If you want to understand how the JPEG code works, we suggest reading one or
+more of the REFERENCES, then looking at the documentation files (in roughly
+the order listed) before diving into the code.
+
+
+OVERVIEW
+========
+
+This package contains C software to implement JPEG image encoding, decoding,
+and transcoding.  JPEG (pronounced "jay-peg") is a standardized compression
+method for full-color and grayscale images.
+
+This software implements JPEG baseline, extended-sequential, and progressive
+compression processes.  Provision is made for supporting all variants of these
+processes, although some uncommon parameter settings aren't implemented yet.
+We have made no provision for supporting the hierarchical or lossless
+processes defined in the standard.
+
+We provide a set of library routines for reading and writing JPEG image files,
+plus two sample applications "cjpeg" and "djpeg", which use the library to
+perform conversion between JPEG and some other popular image file formats.
+The library is intended to be reused in other applications.
+
+In order to support file conversion and viewing software, we have included
+considerable functionality beyond the bare JPEG coding/decoding capability;
+for example, the color quantization modules are not strictly part of JPEG
+decoding, but they are essential for output to colormapped file formats or
+colormapped displays.  These extra functions can be compiled out of the
+library if not required for a particular application.
+
+We have also included "jpegtran", a utility for lossless transcoding between
+different JPEG processes, and "rdjpgcom" and "wrjpgcom", two simple
+applications for inserting and extracting textual comments in JFIF files.
+
+The emphasis in designing this software has been on achieving portability and
+flexibility, while also making it fast enough to be useful.  In particular,
+the software is not intended to be read as a tutorial on JPEG.  (See the
+REFERENCES section for introductory material.)  Rather, it is intended to
+be reliable, portable, industrial-strength code.  We do not claim to have
+achieved that goal in every aspect of the software, but we strive for it.
+
+We welcome the use of this software as a component of commercial products.
+No royalty is required, but we do ask for an acknowledgement in product
+documentation, as described under LEGAL ISSUES.
+
+
+LEGAL ISSUES
+============
+
+In plain English:
+
+1. We don't promise that this software works.  (But if you find any bugs,
+   please let us know!)
+2. You can use this software for whatever you want.  You don't have to pay us.
+3. You may not pretend that you wrote this software.  If you use it in a
+   program, you must acknowledge somewhere in your documentation that
+   you've used the IJG code.
+
+In legalese:
+
+The authors make NO WARRANTY or representation, either express or implied,
+with respect to this software, its quality, accuracy, merchantability, or
+fitness for a particular purpose.  This software is provided "AS IS", and you,
+its user, assume the entire risk as to its quality and accuracy.
+
+This software is copyright (C) 1991-2018, Thomas G. Lane, Guido Vollbeding.
+All Rights Reserved except as specified below.
+
+Permission is hereby granted to use, copy, modify, and distribute this
+software (or portions thereof) for any purpose, without fee, subject to these
+conditions:
+(1) If any part of the source code for this software is distributed, then this
+README file must be included, with this copyright and no-warranty notice
+unaltered; and any additions, deletions, or changes to the original files
+must be clearly indicated in accompanying documentation.
+(2) If only executable code is distributed, then the accompanying
+documentation must state that "this software is based in part on the work of
+the Independent JPEG Group".
+(3) Permission for use of this software is granted only if the user accepts
+full responsibility for any undesirable consequences; the authors accept
+NO LIABILITY for damages of any kind.
+
+These conditions apply to any software derived from or based on the IJG code,
+not just to the unmodified library.  If you use our work, you ought to
+acknowledge us.
+
+Permission is NOT granted for the use of any IJG author's name or company name
+in advertising or publicity relating to this software or products derived from
+it.  This software may be referred to only as "the Independent JPEG Group's
+software".
+
+We specifically permit and encourage the use of this software as the basis of
+commercial products, provided that all warranty or liability claims are
+assumed by the product vendor.
+
+
+The Unix configuration script "configure" was produced with GNU Autoconf.
+It is copyright by the Free Software Foundation but is freely distributable.
+The same holds for its supporting scripts (config.guess, config.sub,
+ltmain.sh).  Another support script, install-sh, is copyright by X Consortium
+but is also freely distributable.
+
+The IJG distribution formerly included code to read and write GIF files.
+To avoid entanglement with the Unisys LZW patent (now expired), GIF reading
+support has been removed altogether, and the GIF writer has been simplified
+to produce "uncompressed GIFs".  This technique does not use the LZW
+algorithm; the resulting GIF files are larger than usual, but are readable
+by all standard GIF decoders.
+
+
+REFERENCES
+==========
+
+We recommend reading one or more of these references before trying to
+understand the innards of the JPEG software.
+
+The best short technical introduction to the JPEG compression algorithm is
+	Wallace, Gregory K.  "The JPEG Still Picture Compression Standard",
+	Communications of the ACM, April 1991 (vol. 34 no. 4), pp. 30-44.
+(Adjacent articles in that issue discuss MPEG motion picture compression,
+applications of JPEG, and related topics.)  If you don't have the CACM issue
+handy, a PDF file containing a revised version of Wallace's article is
+available at http://www.ijg.org/files/Wallace.JPEG.pdf.  The file (actually
+a preprint for an article that appeared in IEEE Trans. Consumer Electronics)
+omits the sample images that appeared in CACM, but it includes corrections
+and some added material.  Note: the Wallace article is copyright ACM and IEEE,
+and it may not be used for commercial purposes.
+
+A somewhat less technical, more leisurely introduction to JPEG can be found in
+"The Data Compression Book" by Mark Nelson and Jean-loup Gailly, published by
+M&T Books (New York), 2nd ed. 1996, ISBN 1-55851-434-1.  This book provides
+good explanations and example C code for a multitude of compression methods
+including JPEG.  It is an excellent source if you are comfortable reading C
+code but don't know much about data compression in general.  The book's JPEG
+sample code is far from industrial-strength, but when you are ready to look
+at a full implementation, you've got one here...
+
+The best currently available description of JPEG is the textbook "JPEG Still
+Image Data Compression Standard" by William B. Pennebaker and Joan L.
+Mitchell, published by Van Nostrand Reinhold, 1993, ISBN 0-442-01272-1.
+Price US$59.95, 638 pp.  The book includes the complete text of the ISO JPEG
+standards (DIS 10918-1 and draft DIS 10918-2).
+Although this is by far the most detailed and comprehensive exposition of
+JPEG publicly available, we point out that it is still missing an explanation
+of the most essential properties and algorithms of the underlying DCT
+technology.
+If you think that you know about DCT-based JPEG after reading this book,
+then you are in delusion.  The real fundamentals and corresponding potential
+of DCT-based JPEG are not publicly known so far, and that is the reason for
+all the mistaken developments taking place in the image coding domain.
+
+The original JPEG standard is divided into two parts, Part 1 being the actual
+specification, while Part 2 covers compliance testing methods.  Part 1 is
+titled "Digital Compression and Coding of Continuous-tone Still Images,
+Part 1: Requirements and guidelines" and has document numbers ISO/IEC IS
+10918-1, ITU-T T.81.  Part 2 is titled "Digital Compression and Coding of
+Continuous-tone Still Images, Part 2: Compliance testing" and has document
+numbers ISO/IEC IS 10918-2, ITU-T T.83.
+IJG JPEG 8 introduced an implementation of the JPEG SmartScale extension
+which is specified in two documents:  A contributed document at ITU and ISO
+with title "ITU-T JPEG-Plus Proposal for Extending ITU-T T.81 for Advanced
+Image Coding", April 2006, Geneva, Switzerland.  The latest version of this
+document is Revision 3.  And a contributed document ISO/IEC JTC1/SC29/WG1 N
+5799 with title "Evolution of JPEG", June/July 2011, Berlin, Germany.
+IJG JPEG 9 introduces a reversible color transform for improved lossless
+compression which is described in a contributed document ISO/IEC JTC1/SC29/
+WG1 N 6080 with title "JPEG 9 Lossless Coding", June/July 2012, Paris,
+France.
+
+The JPEG standard does not specify all details of an interchangeable file
+format.  For the omitted details we follow the "JFIF" conventions, version 2.
+JFIF version 1 has been adopted as Recommendation ITU-T T.871 (05/2011) :
+Information technology - Digital compression and coding of continuous-tone
+still images: JPEG File Interchange Format (JFIF).  It is available as a
+free download in PDF file format from http://www.itu.int/rec/T-REC-T.871.
+A PDF file of the older JFIF document is available at
+http://www.w3.org/Graphics/JPEG/jfif3.pdf.
+
+The TIFF 6.0 file format specification can be obtained by FTP from
+ftp://ftp.sgi.com/graphics/tiff/TIFF6.ps.gz.  The JPEG incorporation scheme
+found in the TIFF 6.0 spec of 3-June-92 has a number of serious problems.
+IJG does not recommend use of the TIFF 6.0 design (TIFF Compression tag 6).
+Instead, we recommend the JPEG design proposed by TIFF Technical Note #2
+(Compression tag 7).  Copies of this Note can be obtained from
+http://www.ijg.org/files/.  It is expected that the next revision
+of the TIFF spec will replace the 6.0 JPEG design with the Note's design.
+Although IJG's own code does not support TIFF/JPEG, the free libtiff library
+uses our library to implement TIFF/JPEG per the Note.
+
+
+ARCHIVE LOCATIONS
+=================
+
+The "official" archive site for this software is www.ijg.org.
+The most recent released version can always be found there in
+directory "files".  This particular version will be archived as
+http://www.ijg.org/files/jpegsrc.v9c.tar.gz, and in Windows-compatible
+"zip" archive format as http://www.ijg.org/files/jpegsr9c.zip.
+
+The JPEG FAQ (Frequently Asked Questions) article is a source of some
+general information about JPEG.
+It is available on the World Wide Web at http://www.faqs.org/faqs/jpeg-faq/
+and other news.answers archive sites, including the official news.answers
+archive at rtfm.mit.edu: ftp://rtfm.mit.edu/pub/usenet/news.answers/jpeg-faq/.
+If you don't have Web or FTP access, send e-mail to mail-server@rtfm.mit.edu
+with body
+	send usenet/news.answers/jpeg-faq/part1
+	send usenet/news.answers/jpeg-faq/part2
+
+
+ACKNOWLEDGMENTS
+===============
+
+Thank to Juergen Bruder for providing me with a copy of the common DCT
+algorithm article, only to find out that I had come to the same result
+in a more direct and comprehensible way with a more generative approach.
+
+Thank to Istvan Sebestyen and Joan L. Mitchell for inviting me to the
+ITU JPEG (Study Group 16) meeting in Geneva, Switzerland.
+
+Thank to Thomas Wiegand and Gary Sullivan for inviting me to the
+Joint Video Team (MPEG & ITU) meeting in Geneva, Switzerland.
+
+Thank to Thomas Richter and Daniel Lee for inviting me to the
+ISO/IEC JTC1/SC29/WG1 (previously known as JPEG, together with ITU-T SG16)
+meeting in Berlin, Germany.
+
+Thank to John Korejwa and Massimo Ballerini for inviting me to
+fruitful consultations in Boston, MA and Milan, Italy.
+
+Thank to Hendrik Elstner, Roland Fassauer, Simone Zuck, Guenther
+Maier-Gerber, Walter Stoeber, Fred Schmitz, and Norbert Braunagel
+for corresponding business development.
+
+Thank to Nico Zschach and Dirk Stelling of the technical support team
+at the Digital Images company in Halle for providing me with extra
+equipment for configuration tests.
+
+Thank to Richard F. Lyon (then of Foveon Inc.) for fruitful
+communication about JPEG configuration in Sigma Photo Pro software.
+
+Thank to Andrew Finkenstadt for hosting the ijg.org site.
+
+Thank to Thomas G. Lane for the original design and development of
+this singular software package.
+
+Thank to Lars Goehler, Andreas Heinecke, Sebastian Fuss, Yvonne Roebert,
+Andrej Werner, and Ulf-Dietrich Braumann for support and public relations.
+
+
+FILE FORMAT WARS
+================
+
+The ISO/IEC JTC1/SC29/WG1 standards committee (previously known as JPEG,
+together with ITU-T SG16) currently promotes different formats containing
+the name "JPEG" which is misleading because these formats are incompatible
+with original DCT-based JPEG and are based on faulty technologies.
+IJG therefore does not and will not support such momentary mistakes
+(see REFERENCES).
+There exist also distributions under the name "OpenJPEG" promoting such
+kind of formats which is misleading because they don't support original
+JPEG images.
+We have no sympathy for the promotion of inferior formats.  Indeed, one of
+the original reasons for developing this free software was to help force
+convergence on common, interoperable format standards for JPEG files.
+Don't use an incompatible file format!
+(In any case, our decoder will remain capable of reading existing JPEG
+image files indefinitely.)
+
+The ISO committee pretends to be "responsible for the popular JPEG" in their
+public reports which is not true because they don't respond to actual
+requirements for the maintenance of the original JPEG specification.
+Furthermore, the ISO committee pretends to "ensure interoperability" with
+their standards which is not true because their "standards" support only
+application-specific and proprietary use cases and contain mathematically
+incorrect code.
+
+There are currently different distributions in circulation containing the
+name "libjpeg" which is misleading because they don't have the features and
+are incompatible with formats supported by actual IJG libjpeg distributions.
+One of those fakes is released by members of the ISO committee and just uses
+the name of libjpeg for misdirection of people, similar to the abuse of the
+name JPEG as described above, while having nothing in common with actual IJG
+libjpeg distributions and containing mathematically incorrect code.
+The other one claims to be a "derivative" or "fork" of the original libjpeg,
+but violates the license conditions as described under LEGAL ISSUES above
+and violates basic C programming properties.
+We have no sympathy for the release of misleading, incorrect and illegal
+distributions derived from obsolete code bases.
+Don't use an obsolete code base!
+
+According to the UCC (Uniform Commercial Code) law, IJG has the lawful and
+legal right to foreclose on certain standardization bodies and other
+institutions or corporations that knowingly perform substantial and
+systematic deceptive acts and practices, fraud, theft, and damaging of the
+value of the people of this planet without their knowing, willing and
+intentional consent.
+The titles, ownership, and rights of these institutions and all their assets
+are now duly secured and held in trust for the free people of this planet.
+People of the planet, on every country, may have a financial interest in
+the assets of these former principals, agents, and beneficiaries of the
+foreclosed institutions and corporations.
+IJG asserts what is: that each man, woman, and child has unalienable value
+and rights granted and deposited in them by the Creator and not any one of
+the people is subordinate to any artificial principality, corporate fiction
+or the special interest of another without their appropriate knowing,
+willing and intentional consent made by contract or accommodation agreement.
+IJG expresses that which already was.
+The people have already determined and demanded that public administration
+entities, national governments, and their supporting judicial systems must
+be fully transparent, accountable, and liable.
+IJG has secured the value for all concerned free people of the planet.
+
+A partial list of foreclosed institutions and corporations ("Hall of Shame")
+is currently prepared and will be published later.
+
+
+TO DO
+=====
+
+Version 9 is the second release of a new generation JPEG standard
+to overcome the limitations of the original JPEG specification,
+and is the first true source reference JPEG codec.
+More features are being prepared for coming releases...
+
+Please send bug reports, offers of help, etc. to jpeg-info@jpegclub.org.
diff --git a/libraries/jpeg/jaricom.c b/libraries/jpeg/jaricom.c
new file mode 100644
index 000000000..690068861
--- /dev/null
+++ b/libraries/jpeg/jaricom.c
@@ -0,0 +1,153 @@
+/*
+ * jaricom.c
+ *
+ * Developed 1997-2011 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains probability estimation tables for common use in
+ * arithmetic entropy encoding and decoding routines.
+ *
+ * This data represents Table D.3 in the JPEG spec (D.2 in the draft),
+ * ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81, and Table 24
+ * in the JBIG spec, ISO/IEC IS 11544 and CCITT Recommendation ITU-T T.82.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+/* The following #define specifies the packing of the four components
+ * into the compact INT32 representation.
+ * Note that this formula must match the actual arithmetic encoder
+ * and decoder implementation.  The implementation has to be changed
+ * if this formula is changed.
+ * The current organization is leaned on Markus Kuhn's JBIG
+ * implementation (jbig_tab.c).
+ */
+
+#define V(i,a,b,c,d) (((INT32)a << 16) | ((INT32)c << 8) | ((INT32)d << 7) | b)
+
+const INT32 jpeg_aritab[113+1] = {
+/*
+ * Index, Qe_Value, Next_Index_LPS, Next_Index_MPS, Switch_MPS
+ */
+  V(   0, 0x5a1d,   1,   1, 1 ),
+  V(   1, 0x2586,  14,   2, 0 ),
+  V(   2, 0x1114,  16,   3, 0 ),
+  V(   3, 0x080b,  18,   4, 0 ),
+  V(   4, 0x03d8,  20,   5, 0 ),
+  V(   5, 0x01da,  23,   6, 0 ),
+  V(   6, 0x00e5,  25,   7, 0 ),
+  V(   7, 0x006f,  28,   8, 0 ),
+  V(   8, 0x0036,  30,   9, 0 ),
+  V(   9, 0x001a,  33,  10, 0 ),
+  V(  10, 0x000d,  35,  11, 0 ),
+  V(  11, 0x0006,   9,  12, 0 ),
+  V(  12, 0x0003,  10,  13, 0 ),
+  V(  13, 0x0001,  12,  13, 0 ),
+  V(  14, 0x5a7f,  15,  15, 1 ),
+  V(  15, 0x3f25,  36,  16, 0 ),
+  V(  16, 0x2cf2,  38,  17, 0 ),
+  V(  17, 0x207c,  39,  18, 0 ),
+  V(  18, 0x17b9,  40,  19, 0 ),
+  V(  19, 0x1182,  42,  20, 0 ),
+  V(  20, 0x0cef,  43,  21, 0 ),
+  V(  21, 0x09a1,  45,  22, 0 ),
+  V(  22, 0x072f,  46,  23, 0 ),
+  V(  23, 0x055c,  48,  24, 0 ),
+  V(  24, 0x0406,  49,  25, 0 ),
+  V(  25, 0x0303,  51,  26, 0 ),
+  V(  26, 0x0240,  52,  27, 0 ),
+  V(  27, 0x01b1,  54,  28, 0 ),
+  V(  28, 0x0144,  56,  29, 0 ),
+  V(  29, 0x00f5,  57,  30, 0 ),
+  V(  30, 0x00b7,  59,  31, 0 ),
+  V(  31, 0x008a,  60,  32, 0 ),
+  V(  32, 0x0068,  62,  33, 0 ),
+  V(  33, 0x004e,  63,  34, 0 ),
+  V(  34, 0x003b,  32,  35, 0 ),
+  V(  35, 0x002c,  33,   9, 0 ),
+  V(  36, 0x5ae1,  37,  37, 1 ),
+  V(  37, 0x484c,  64,  38, 0 ),
+  V(  38, 0x3a0d,  65,  39, 0 ),
+  V(  39, 0x2ef1,  67,  40, 0 ),
+  V(  40, 0x261f,  68,  41, 0 ),
+  V(  41, 0x1f33,  69,  42, 0 ),
+  V(  42, 0x19a8,  70,  43, 0 ),
+  V(  43, 0x1518,  72,  44, 0 ),
+  V(  44, 0x1177,  73,  45, 0 ),
+  V(  45, 0x0e74,  74,  46, 0 ),
+  V(  46, 0x0bfb,  75,  47, 0 ),
+  V(  47, 0x09f8,  77,  48, 0 ),
+  V(  48, 0x0861,  78,  49, 0 ),
+  V(  49, 0x0706,  79,  50, 0 ),
+  V(  50, 0x05cd,  48,  51, 0 ),
+  V(  51, 0x04de,  50,  52, 0 ),
+  V(  52, 0x040f,  50,  53, 0 ),
+  V(  53, 0x0363,  51,  54, 0 ),
+  V(  54, 0x02d4,  52,  55, 0 ),
+  V(  55, 0x025c,  53,  56, 0 ),
+  V(  56, 0x01f8,  54,  57, 0 ),
+  V(  57, 0x01a4,  55,  58, 0 ),
+  V(  58, 0x0160,  56,  59, 0 ),
+  V(  59, 0x0125,  57,  60, 0 ),
+  V(  60, 0x00f6,  58,  61, 0 ),
+  V(  61, 0x00cb,  59,  62, 0 ),
+  V(  62, 0x00ab,  61,  63, 0 ),
+  V(  63, 0x008f,  61,  32, 0 ),
+  V(  64, 0x5b12,  65,  65, 1 ),
+  V(  65, 0x4d04,  80,  66, 0 ),
+  V(  66, 0x412c,  81,  67, 0 ),
+  V(  67, 0x37d8,  82,  68, 0 ),
+  V(  68, 0x2fe8,  83,  69, 0 ),
+  V(  69, 0x293c,  84,  70, 0 ),
+  V(  70, 0x2379,  86,  71, 0 ),
+  V(  71, 0x1edf,  87,  72, 0 ),
+  V(  72, 0x1aa9,  87,  73, 0 ),
+  V(  73, 0x174e,  72,  74, 0 ),
+  V(  74, 0x1424,  72,  75, 0 ),
+  V(  75, 0x119c,  74,  76, 0 ),
+  V(  76, 0x0f6b,  74,  77, 0 ),
+  V(  77, 0x0d51,  75,  78, 0 ),
+  V(  78, 0x0bb6,  77,  79, 0 ),
+  V(  79, 0x0a40,  77,  48, 0 ),
+  V(  80, 0x5832,  80,  81, 1 ),
+  V(  81, 0x4d1c,  88,  82, 0 ),
+  V(  82, 0x438e,  89,  83, 0 ),
+  V(  83, 0x3bdd,  90,  84, 0 ),
+  V(  84, 0x34ee,  91,  85, 0 ),
+  V(  85, 0x2eae,  92,  86, 0 ),
+  V(  86, 0x299a,  93,  87, 0 ),
+  V(  87, 0x2516,  86,  71, 0 ),
+  V(  88, 0x5570,  88,  89, 1 ),
+  V(  89, 0x4ca9,  95,  90, 0 ),
+  V(  90, 0x44d9,  96,  91, 0 ),
+  V(  91, 0x3e22,  97,  92, 0 ),
+  V(  92, 0x3824,  99,  93, 0 ),
+  V(  93, 0x32b4,  99,  94, 0 ),
+  V(  94, 0x2e17,  93,  86, 0 ),
+  V(  95, 0x56a8,  95,  96, 1 ),
+  V(  96, 0x4f46, 101,  97, 0 ),
+  V(  97, 0x47e5, 102,  98, 0 ),
+  V(  98, 0x41cf, 103,  99, 0 ),
+  V(  99, 0x3c3d, 104, 100, 0 ),
+  V( 100, 0x375e,  99,  93, 0 ),
+  V( 101, 0x5231, 105, 102, 0 ),
+  V( 102, 0x4c0f, 106, 103, 0 ),
+  V( 103, 0x4639, 107, 104, 0 ),
+  V( 104, 0x415e, 103,  99, 0 ),
+  V( 105, 0x5627, 105, 106, 1 ),
+  V( 106, 0x50e7, 108, 107, 0 ),
+  V( 107, 0x4b85, 109, 103, 0 ),
+  V( 108, 0x5597, 110, 109, 0 ),
+  V( 109, 0x504f, 111, 107, 0 ),
+  V( 110, 0x5a10, 110, 111, 1 ),
+  V( 111, 0x5522, 112, 109, 0 ),
+  V( 112, 0x59eb, 112, 111, 1 ),
+/*
+ * This last entry is used for fixed probability estimate of 0.5
+ * as suggested in Section 10.3 Table 5 of ITU-T Rec. T.851.
+ */
+  V( 113, 0x5a1d, 113, 113, 0 )
+};
diff --git a/libraries/jpeg/jcomapi.c b/libraries/jpeg/jcomapi.c
new file mode 100644
index 000000000..9b1fa7568
--- /dev/null
+++ b/libraries/jpeg/jcomapi.c
@@ -0,0 +1,106 @@
+/*
+ * jcomapi.c
+ *
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains application interface routines that are used for both
+ * compression and decompression.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Abort processing of a JPEG compression or decompression operation,
+ * but don't destroy the object itself.
+ *
+ * For this, we merely clean up all the nonpermanent memory pools.
+ * Note that temp files (virtual arrays) are not allowed to belong to
+ * the permanent pool, so we will be able to close all temp files here.
+ * Closing a data source or destination, if necessary, is the application's
+ * responsibility.
+ */
+
+GLOBAL(void)
+jpeg_abort (j_common_ptr cinfo)
+{
+  int pool;
+
+  /* Do nothing if called on a not-initialized or destroyed JPEG object. */
+  if (cinfo->mem == NULL)
+    return;
+
+  /* Releasing pools in reverse order might help avoid fragmentation
+   * with some (brain-damaged) malloc libraries.
+   */
+  for (pool = JPOOL_NUMPOOLS-1; pool > JPOOL_PERMANENT; pool--) {
+    (*cinfo->mem->free_pool) (cinfo, pool);
+  }
+
+  /* Reset overall state for possible reuse of object */
+  if (cinfo->is_decompressor) {
+    cinfo->global_state = DSTATE_START;
+    /* Try to keep application from accessing now-deleted marker list.
+     * A bit kludgy to do it here, but this is the most central place.
+     */
+    ((j_decompress_ptr) cinfo)->marker_list = NULL;
+  } else {
+    cinfo->global_state = CSTATE_START;
+  }
+}
+
+
+/*
+ * Destruction of a JPEG object.
+ *
+ * Everything gets deallocated except the master jpeg_compress_struct itself
+ * and the error manager struct.  Both of these are supplied by the application
+ * and must be freed, if necessary, by the application.  (Often they are on
+ * the stack and so don't need to be freed anyway.)
+ * Closing a data source or destination, if necessary, is the application's
+ * responsibility.
+ */
+
+GLOBAL(void)
+jpeg_destroy (j_common_ptr cinfo)
+{
+  /* We need only tell the memory manager to release everything. */
+  /* NB: mem pointer is NULL if memory mgr failed to initialize. */
+  if (cinfo->mem != NULL)
+    (*cinfo->mem->self_destruct) (cinfo);
+  cinfo->mem = NULL;		/* be safe if jpeg_destroy is called twice */
+  cinfo->global_state = 0;	/* mark it destroyed */
+}
+
+
+/*
+ * Convenience routines for allocating quantization and Huffman tables.
+ * (Would jutils.c be a more reasonable place to put these?)
+ */
+
+GLOBAL(JQUANT_TBL *)
+jpeg_alloc_quant_table (j_common_ptr cinfo)
+{
+  JQUANT_TBL *tbl;
+
+  tbl = (JQUANT_TBL *)
+    (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, SIZEOF(JQUANT_TBL));
+  tbl->sent_table = FALSE;	/* make sure this is false in any new table */
+  return tbl;
+}
+
+
+GLOBAL(JHUFF_TBL *)
+jpeg_alloc_huff_table (j_common_ptr cinfo)
+{
+  JHUFF_TBL *tbl;
+
+  tbl = (JHUFF_TBL *)
+    (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, SIZEOF(JHUFF_TBL));
+  tbl->sent_table = FALSE;	/* make sure this is false in any new table */
+  return tbl;
+}
diff --git a/libraries/jpeg/jconfig.h b/libraries/jpeg/jconfig.h
new file mode 100644
index 000000000..347e6cb0f
--- /dev/null
+++ b/libraries/jpeg/jconfig.h
@@ -0,0 +1,32 @@
+/* jconfig.vc --- jconfig.h for Microsoft Visual C++ on Windows 95 or NT. */
+/* see jconfig.doc for explanations */
+
+#define HAVE_PROTOTYPES
+#define HAVE_UNSIGNED_CHAR
+#define HAVE_UNSIGNED_SHORT
+/* #define void char */
+/* #define const */
+#undef CHAR_IS_UNSIGNED
+#define HAVE_STDDEF_H
+#define HAVE_STDLIB_H
+#undef NEED_BSD_STRINGS
+#undef NEED_SYS_TYPES_H
+
+/* Define "boolean" as unsigned char, not int, per Windows custom */
+#ifndef __RPCNDR_H__		/* don't conflict if rpcndr.h already read */
+typedef unsigned char boolean;
+#endif
+#define HAVE_BOOLEAN		/* prevent jmorecfg.h from redefining it */
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#ifdef JPEG_INTERNALS
+
+#undef RIGHT_SHIFT_IS_UNSIGNED
+
+#endif /* JPEG_INTERNALS */
diff --git a/libraries/jpeg/jdapimin.c b/libraries/jpeg/jdapimin.c
new file mode 100644
index 000000000..a6e0dd9fb
--- /dev/null
+++ b/libraries/jpeg/jdapimin.c
@@ -0,0 +1,399 @@
+/*
+ * jdapimin.c
+ *
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * Modified 2009-2013 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains application interface code for the decompression half
+ * of the JPEG library.  These are the "minimum" API routines that may be
+ * needed in either the normal full-decompression case or the
+ * transcoding-only case.
+ *
+ * Most of the routines intended to be called directly by an application
+ * are in this file or in jdapistd.c.  But also see jcomapi.c for routines
+ * shared by compression and decompression, and jdtrans.c for the transcoding
+ * case.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Initialization of a JPEG decompression object.
+ * The error manager must already be set up (in case memory manager fails).
+ */
+
+GLOBAL(void)
+jpeg_CreateDecompress (j_decompress_ptr cinfo, int version, size_t structsize)
+{
+  int i;
+
+  /* Guard against version mismatches between library and caller. */
+  cinfo->mem = NULL;		/* so jpeg_destroy knows mem mgr not called */
+  if (version != JPEG_LIB_VERSION)
+    ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version);
+  if (structsize != SIZEOF(struct jpeg_decompress_struct))
+    ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, 
+	     (int) SIZEOF(struct jpeg_decompress_struct), (int) structsize);
+
+  /* For debugging purposes, we zero the whole master structure.
+   * But the application has already set the err pointer, and may have set
+   * client_data, so we have to save and restore those fields.
+   * Note: if application hasn't set client_data, tools like Purify may
+   * complain here.
+   */
+  {
+    struct jpeg_error_mgr * err = cinfo->err;
+    void * client_data = cinfo->client_data; /* ignore Purify complaint here */
+    MEMZERO(cinfo, SIZEOF(struct jpeg_decompress_struct));
+    cinfo->err = err;
+    cinfo->client_data = client_data;
+  }
+  cinfo->is_decompressor = TRUE;
+
+  /* Initialize a memory manager instance for this object */
+  jinit_memory_mgr((j_common_ptr) cinfo);
+
+  /* Zero out pointers to permanent structures. */
+  cinfo->progress = NULL;
+  cinfo->src = NULL;
+
+  for (i = 0; i < NUM_QUANT_TBLS; i++)
+    cinfo->quant_tbl_ptrs[i] = NULL;
+
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    cinfo->dc_huff_tbl_ptrs[i] = NULL;
+    cinfo->ac_huff_tbl_ptrs[i] = NULL;
+  }
+
+  /* Initialize marker processor so application can override methods
+   * for COM, APPn markers before calling jpeg_read_header.
+   */
+  cinfo->marker_list = NULL;
+  jinit_marker_reader(cinfo);
+
+  /* And initialize the overall input controller. */
+  jinit_input_controller(cinfo);
+
+  /* OK, I'm ready */
+  cinfo->global_state = DSTATE_START;
+}
+
+
+/*
+ * Destruction of a JPEG decompression object
+ */
+
+GLOBAL(void)
+jpeg_destroy_decompress (j_decompress_ptr cinfo)
+{
+  jpeg_destroy((j_common_ptr) cinfo); /* use common routine */
+}
+
+
+/*
+ * Abort processing of a JPEG decompression operation,
+ * but don't destroy the object itself.
+ */
+
+GLOBAL(void)
+jpeg_abort_decompress (j_decompress_ptr cinfo)
+{
+  jpeg_abort((j_common_ptr) cinfo); /* use common routine */
+}
+
+
+/*
+ * Set default decompression parameters.
+ */
+
+LOCAL(void)
+default_decompress_parms (j_decompress_ptr cinfo)
+{
+  int cid0, cid1, cid2;
+
+  /* Guess the input colorspace, and set output colorspace accordingly. */
+  /* Note application may override our guesses. */
+  switch (cinfo->num_components) {
+  case 1:
+    cinfo->jpeg_color_space = JCS_GRAYSCALE;
+    cinfo->out_color_space = JCS_GRAYSCALE;
+    break;
+    
+  case 3:
+    cid0 = cinfo->comp_info[0].component_id;
+    cid1 = cinfo->comp_info[1].component_id;
+    cid2 = cinfo->comp_info[2].component_id;
+
+    /* First try to guess from the component IDs */
+    if      (cid0 == 0x01 && cid1 == 0x02 && cid2 == 0x03)
+      cinfo->jpeg_color_space = JCS_YCbCr;
+    else if (cid0 == 0x01 && cid1 == 0x22 && cid2 == 0x23)
+      cinfo->jpeg_color_space = JCS_BG_YCC;
+    else if (cid0 == 0x52 && cid1 == 0x47 && cid2 == 0x42)
+      cinfo->jpeg_color_space = JCS_RGB;	/* ASCII 'R', 'G', 'B' */
+    else if (cid0 == 0x72 && cid1 == 0x67 && cid2 == 0x62)
+      cinfo->jpeg_color_space = JCS_BG_RGB;	/* ASCII 'r', 'g', 'b' */
+    else if (cinfo->saw_JFIF_marker)
+      cinfo->jpeg_color_space = JCS_YCbCr;	/* assume it's YCbCr */
+    else if (cinfo->saw_Adobe_marker) {
+      switch (cinfo->Adobe_transform) {
+      case 0:
+	cinfo->jpeg_color_space = JCS_RGB;
+	break;
+      case 1:
+	cinfo->jpeg_color_space = JCS_YCbCr;
+	break;
+      default:
+	WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
+	cinfo->jpeg_color_space = JCS_YCbCr;	/* assume it's YCbCr */
+	break;
+      }
+    } else {
+      TRACEMS3(cinfo, 1, JTRC_UNKNOWN_IDS, cid0, cid1, cid2);
+      cinfo->jpeg_color_space = JCS_YCbCr;	/* assume it's YCbCr */
+    }
+    /* Always guess RGB is proper output colorspace. */
+    cinfo->out_color_space = JCS_RGB;
+    break;
+    
+  case 4:
+    if (cinfo->saw_Adobe_marker) {
+      switch (cinfo->Adobe_transform) {
+      case 0:
+	cinfo->jpeg_color_space = JCS_CMYK;
+	break;
+      case 2:
+	cinfo->jpeg_color_space = JCS_YCCK;
+	break;
+      default:
+	WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
+	cinfo->jpeg_color_space = JCS_YCCK;	/* assume it's YCCK */
+	break;
+      }
+    } else {
+      /* No special markers, assume straight CMYK. */
+      cinfo->jpeg_color_space = JCS_CMYK;
+    }
+    cinfo->out_color_space = JCS_CMYK;
+    break;
+    
+  default:
+    cinfo->jpeg_color_space = JCS_UNKNOWN;
+    cinfo->out_color_space = JCS_UNKNOWN;
+    break;
+  }
+
+  /* Set defaults for other decompression parameters. */
+  cinfo->scale_num = cinfo->block_size;		/* 1:1 scaling */
+  cinfo->scale_denom = cinfo->block_size;
+  cinfo->output_gamma = 1.0;
+  cinfo->buffered_image = FALSE;
+  cinfo->raw_data_out = FALSE;
+  cinfo->dct_method = JDCT_DEFAULT;
+  cinfo->do_fancy_upsampling = TRUE;
+  cinfo->do_block_smoothing = TRUE;
+  cinfo->quantize_colors = FALSE;
+  /* We set these in case application only sets quantize_colors. */
+  cinfo->dither_mode = JDITHER_FS;
+#ifdef QUANT_2PASS_SUPPORTED
+  cinfo->two_pass_quantize = TRUE;
+#else
+  cinfo->two_pass_quantize = FALSE;
+#endif
+  cinfo->desired_number_of_colors = 256;
+  cinfo->colormap = NULL;
+  /* Initialize for no mode change in buffered-image mode. */
+  cinfo->enable_1pass_quant = FALSE;
+  cinfo->enable_external_quant = FALSE;
+  cinfo->enable_2pass_quant = FALSE;
+}
+
+
+/*
+ * Decompression startup: read start of JPEG datastream to see what's there.
+ * Need only initialize JPEG object and supply a data source before calling.
+ *
+ * This routine will read as far as the first SOS marker (ie, actual start of
+ * compressed data), and will save all tables and parameters in the JPEG
+ * object.  It will also initialize the decompression parameters to default
+ * values, and finally return JPEG_HEADER_OK.  On return, the application may
+ * adjust the decompression parameters and then call jpeg_start_decompress.
+ * (Or, if the application only wanted to determine the image parameters,
+ * the data need not be decompressed.  In that case, call jpeg_abort or
+ * jpeg_destroy to release any temporary space.)
+ * If an abbreviated (tables only) datastream is presented, the routine will
+ * return JPEG_HEADER_TABLES_ONLY upon reaching EOI.  The application may then
+ * re-use the JPEG object to read the abbreviated image datastream(s).
+ * It is unnecessary (but OK) to call jpeg_abort in this case.
+ * The JPEG_SUSPENDED return code only occurs if the data source module
+ * requests suspension of the decompressor.  In this case the application
+ * should load more source data and then re-call jpeg_read_header to resume
+ * processing.
+ * If a non-suspending data source is used and require_image is TRUE, then the
+ * return code need not be inspected since only JPEG_HEADER_OK is possible.
+ *
+ * This routine is now just a front end to jpeg_consume_input, with some
+ * extra error checking.
+ */
+
+GLOBAL(int)
+jpeg_read_header (j_decompress_ptr cinfo, boolean require_image)
+{
+  int retcode;
+
+  if (cinfo->global_state != DSTATE_START &&
+      cinfo->global_state != DSTATE_INHEADER)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  retcode = jpeg_consume_input(cinfo);
+
+  switch (retcode) {
+  case JPEG_REACHED_SOS:
+    retcode = JPEG_HEADER_OK;
+    break;
+  case JPEG_REACHED_EOI:
+    if (require_image)		/* Complain if application wanted an image */
+      ERREXIT(cinfo, JERR_NO_IMAGE);
+    /* Reset to start state; it would be safer to require the application to
+     * call jpeg_abort, but we can't change it now for compatibility reasons.
+     * A side effect is to free any temporary memory (there shouldn't be any).
+     */
+    jpeg_abort((j_common_ptr) cinfo); /* sets state = DSTATE_START */
+    retcode = JPEG_HEADER_TABLES_ONLY;
+    break;
+  case JPEG_SUSPENDED:
+    /* no work */
+    break;
+  }
+
+  return retcode;
+}
+
+
+/*
+ * Consume data in advance of what the decompressor requires.
+ * This can be called at any time once the decompressor object has
+ * been created and a data source has been set up.
+ *
+ * This routine is essentially a state machine that handles a couple
+ * of critical state-transition actions, namely initial setup and
+ * transition from header scanning to ready-for-start_decompress.
+ * All the actual input is done via the input controller's consume_input
+ * method.
+ */
+
+GLOBAL(int)
+jpeg_consume_input (j_decompress_ptr cinfo)
+{
+  int retcode = JPEG_SUSPENDED;
+
+  /* NB: every possible DSTATE value should be listed in this switch */
+  switch (cinfo->global_state) {
+  case DSTATE_START:
+    /* Start-of-datastream actions: reset appropriate modules */
+    (*cinfo->inputctl->reset_input_controller) (cinfo);
+    /* Initialize application's data source module */
+    (*cinfo->src->init_source) (cinfo);
+    cinfo->global_state = DSTATE_INHEADER;
+    /*FALLTHROUGH*/
+  case DSTATE_INHEADER:
+    retcode = (*cinfo->inputctl->consume_input) (cinfo);
+    if (retcode == JPEG_REACHED_SOS) { /* Found SOS, prepare to decompress */
+      /* Set up default parameters based on header data */
+      default_decompress_parms(cinfo);
+      /* Set global state: ready for start_decompress */
+      cinfo->global_state = DSTATE_READY;
+    }
+    break;
+  case DSTATE_READY:
+    /* Can't advance past first SOS until start_decompress is called */
+    retcode = JPEG_REACHED_SOS;
+    break;
+  case DSTATE_PRELOAD:
+  case DSTATE_PRESCAN:
+  case DSTATE_SCANNING:
+  case DSTATE_RAW_OK:
+  case DSTATE_BUFIMAGE:
+  case DSTATE_BUFPOST:
+  case DSTATE_STOPPING:
+    retcode = (*cinfo->inputctl->consume_input) (cinfo);
+    break;
+  default:
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  }
+  return retcode;
+}
+
+
+/*
+ * Have we finished reading the input file?
+ */
+
+GLOBAL(boolean)
+jpeg_input_complete (j_decompress_ptr cinfo)
+{
+  /* Check for valid jpeg object */
+  if (cinfo->global_state < DSTATE_START ||
+      cinfo->global_state > DSTATE_STOPPING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  return cinfo->inputctl->eoi_reached;
+}
+
+
+/*
+ * Is there more than one scan?
+ */
+
+GLOBAL(boolean)
+jpeg_has_multiple_scans (j_decompress_ptr cinfo)
+{
+  /* Only valid after jpeg_read_header completes */
+  if (cinfo->global_state < DSTATE_READY ||
+      cinfo->global_state > DSTATE_STOPPING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  return cinfo->inputctl->has_multiple_scans;
+}
+
+
+/*
+ * Finish JPEG decompression.
+ *
+ * This will normally just verify the file trailer and release temp storage.
+ *
+ * Returns FALSE if suspended.  The return value need be inspected only if
+ * a suspending data source is used.
+ */
+
+GLOBAL(boolean)
+jpeg_finish_decompress (j_decompress_ptr cinfo)
+{
+  if ((cinfo->global_state == DSTATE_SCANNING ||
+       cinfo->global_state == DSTATE_RAW_OK) && ! cinfo->buffered_image) {
+    /* Terminate final pass of non-buffered mode */
+    if (cinfo->output_scanline < cinfo->output_height)
+      ERREXIT(cinfo, JERR_TOO_LITTLE_DATA);
+    (*cinfo->master->finish_output_pass) (cinfo);
+    cinfo->global_state = DSTATE_STOPPING;
+  } else if (cinfo->global_state == DSTATE_BUFIMAGE) {
+    /* Finishing after a buffered-image operation */
+    cinfo->global_state = DSTATE_STOPPING;
+  } else if (cinfo->global_state != DSTATE_STOPPING) {
+    /* STOPPING = repeat call after a suspension, anything else is error */
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  }
+  /* Read until EOI */
+  while (! cinfo->inputctl->eoi_reached) {
+    if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
+      return FALSE;		/* Suspend, come back later */
+  }
+  /* Do final cleanup */
+  (*cinfo->src->term_source) (cinfo);
+  /* We can use jpeg_abort to release memory and reset global_state */
+  jpeg_abort((j_common_ptr) cinfo);
+  return TRUE;
+}
diff --git a/libraries/jpeg/jdapistd.c b/libraries/jpeg/jdapistd.c
new file mode 100644
index 000000000..7f3a78b25
--- /dev/null
+++ b/libraries/jpeg/jdapistd.c
@@ -0,0 +1,276 @@
+/*
+ * jdapistd.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2002-2013 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains application interface code for the decompression half
+ * of the JPEG library.  These are the "standard" API routines that are
+ * used in the normal full-decompression case.  They are not used by a
+ * transcoding-only application.  Note that if an application links in
+ * jpeg_start_decompress, it will end up linking in the entire decompressor.
+ * We thus must separate this file from jdapimin.c to avoid linking the
+ * whole decompression library into a transcoder.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Forward declarations */
+LOCAL(boolean) output_pass_setup JPP((j_decompress_ptr cinfo));
+
+
+/*
+ * Decompression initialization.
+ * jpeg_read_header must be completed before calling this.
+ *
+ * If a multipass operating mode was selected, this will do all but the
+ * last pass, and thus may take a great deal of time.
+ *
+ * Returns FALSE if suspended.  The return value need be inspected only if
+ * a suspending data source is used.
+ */
+
+GLOBAL(boolean)
+jpeg_start_decompress (j_decompress_ptr cinfo)
+{
+  if (cinfo->global_state == DSTATE_READY) {
+    /* First call: initialize master control, select active modules */
+    jinit_master_decompress(cinfo);
+    if (cinfo->buffered_image) {
+      /* No more work here; expecting jpeg_start_output next */
+      cinfo->global_state = DSTATE_BUFIMAGE;
+      return TRUE;
+    }
+    cinfo->global_state = DSTATE_PRELOAD;
+  }
+  if (cinfo->global_state == DSTATE_PRELOAD) {
+    /* If file has multiple scans, absorb them all into the coef buffer */
+    if (cinfo->inputctl->has_multiple_scans) {
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+      for (;;) {
+	int retcode;
+	/* Call progress monitor hook if present */
+	if (cinfo->progress != NULL)
+	  (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+	/* Absorb some more input */
+	retcode = (*cinfo->inputctl->consume_input) (cinfo);
+	if (retcode == JPEG_SUSPENDED)
+	  return FALSE;
+	if (retcode == JPEG_REACHED_EOI)
+	  break;
+	/* Advance progress counter if appropriate */
+	if (cinfo->progress != NULL &&
+	    (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
+	  if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
+	    /* jdmaster underestimated number of scans; ratchet up one scan */
+	    cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
+	  }
+	}
+      }
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+    }
+    cinfo->output_scan_number = cinfo->input_scan_number;
+  } else if (cinfo->global_state != DSTATE_PRESCAN)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Perform any dummy output passes, and set up for the final pass */
+  return output_pass_setup(cinfo);
+}
+
+
+/*
+ * Set up for an output pass, and perform any dummy pass(es) needed.
+ * Common subroutine for jpeg_start_decompress and jpeg_start_output.
+ * Entry: global_state = DSTATE_PRESCAN only if previously suspended.
+ * Exit: If done, returns TRUE and sets global_state for proper output mode.
+ *       If suspended, returns FALSE and sets global_state = DSTATE_PRESCAN.
+ */
+
+LOCAL(boolean)
+output_pass_setup (j_decompress_ptr cinfo)
+{
+  if (cinfo->global_state != DSTATE_PRESCAN) {
+    /* First call: do pass setup */
+    (*cinfo->master->prepare_for_output_pass) (cinfo);
+    cinfo->output_scanline = 0;
+    cinfo->global_state = DSTATE_PRESCAN;
+  }
+  /* Loop over any required dummy passes */
+  while (cinfo->master->is_dummy_pass) {
+#ifdef QUANT_2PASS_SUPPORTED
+    /* Crank through the dummy pass */
+    while (cinfo->output_scanline < cinfo->output_height) {
+      JDIMENSION last_scanline;
+      /* Call progress monitor hook if present */
+      if (cinfo->progress != NULL) {
+	cinfo->progress->pass_counter = (long) cinfo->output_scanline;
+	cinfo->progress->pass_limit = (long) cinfo->output_height;
+	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+      }
+      /* Process some data */
+      last_scanline = cinfo->output_scanline;
+      (*cinfo->main->process_data) (cinfo, (JSAMPARRAY) NULL,
+				    &cinfo->output_scanline, (JDIMENSION) 0);
+      if (cinfo->output_scanline == last_scanline)
+	return FALSE;		/* No progress made, must suspend */
+    }
+    /* Finish up dummy pass, and set up for another one */
+    (*cinfo->master->finish_output_pass) (cinfo);
+    (*cinfo->master->prepare_for_output_pass) (cinfo);
+    cinfo->output_scanline = 0;
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif /* QUANT_2PASS_SUPPORTED */
+  }
+  /* Ready for application to drive output pass through
+   * jpeg_read_scanlines or jpeg_read_raw_data.
+   */
+  cinfo->global_state = cinfo->raw_data_out ? DSTATE_RAW_OK : DSTATE_SCANNING;
+  return TRUE;
+}
+
+
+/*
+ * Read some scanlines of data from the JPEG decompressor.
+ *
+ * The return value will be the number of lines actually read.
+ * This may be less than the number requested in several cases,
+ * including bottom of image, data source suspension, and operating
+ * modes that emit multiple scanlines at a time.
+ *
+ * Note: we warn about excess calls to jpeg_read_scanlines() since
+ * this likely signals an application programmer error.  However,
+ * an oversize buffer (max_lines > scanlines remaining) is not an error.
+ */
+
+GLOBAL(JDIMENSION)
+jpeg_read_scanlines (j_decompress_ptr cinfo, JSAMPARRAY scanlines,
+		     JDIMENSION max_lines)
+{
+  JDIMENSION row_ctr;
+
+  if (cinfo->global_state != DSTATE_SCANNING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  if (cinfo->output_scanline >= cinfo->output_height) {
+    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
+    return 0;
+  }
+
+  /* Call progress monitor hook if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->pass_counter = (long) cinfo->output_scanline;
+    cinfo->progress->pass_limit = (long) cinfo->output_height;
+    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+  }
+
+  /* Process some data */
+  row_ctr = 0;
+  (*cinfo->main->process_data) (cinfo, scanlines, &row_ctr, max_lines);
+  cinfo->output_scanline += row_ctr;
+  return row_ctr;
+}
+
+
+/*
+ * Alternate entry point to read raw data.
+ * Processes exactly one iMCU row per call, unless suspended.
+ */
+
+GLOBAL(JDIMENSION)
+jpeg_read_raw_data (j_decompress_ptr cinfo, JSAMPIMAGE data,
+		    JDIMENSION max_lines)
+{
+  JDIMENSION lines_per_iMCU_row;
+
+  if (cinfo->global_state != DSTATE_RAW_OK)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  if (cinfo->output_scanline >= cinfo->output_height) {
+    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
+    return 0;
+  }
+
+  /* Call progress monitor hook if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->pass_counter = (long) cinfo->output_scanline;
+    cinfo->progress->pass_limit = (long) cinfo->output_height;
+    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+  }
+
+  /* Verify that at least one iMCU row can be returned. */
+  lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->min_DCT_v_scaled_size;
+  if (max_lines < lines_per_iMCU_row)
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+
+  /* Decompress directly into user's buffer. */
+  if (! (*cinfo->coef->decompress_data) (cinfo, data))
+    return 0;			/* suspension forced, can do nothing more */
+
+  /* OK, we processed one iMCU row. */
+  cinfo->output_scanline += lines_per_iMCU_row;
+  return lines_per_iMCU_row;
+}
+
+
+/* Additional entry points for buffered-image mode. */
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+
+/*
+ * Initialize for an output pass in buffered-image mode.
+ */
+
+GLOBAL(boolean)
+jpeg_start_output (j_decompress_ptr cinfo, int scan_number)
+{
+  if (cinfo->global_state != DSTATE_BUFIMAGE &&
+      cinfo->global_state != DSTATE_PRESCAN)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Limit scan number to valid range */
+  if (scan_number <= 0)
+    scan_number = 1;
+  if (cinfo->inputctl->eoi_reached &&
+      scan_number > cinfo->input_scan_number)
+    scan_number = cinfo->input_scan_number;
+  cinfo->output_scan_number = scan_number;
+  /* Perform any dummy output passes, and set up for the real pass */
+  return output_pass_setup(cinfo);
+}
+
+
+/*
+ * Finish up after an output pass in buffered-image mode.
+ *
+ * Returns FALSE if suspended.  The return value need be inspected only if
+ * a suspending data source is used.
+ */
+
+GLOBAL(boolean)
+jpeg_finish_output (j_decompress_ptr cinfo)
+{
+  if ((cinfo->global_state == DSTATE_SCANNING ||
+       cinfo->global_state == DSTATE_RAW_OK) && cinfo->buffered_image) {
+    /* Terminate this pass. */
+    /* We do not require the whole pass to have been completed. */
+    (*cinfo->master->finish_output_pass) (cinfo);
+    cinfo->global_state = DSTATE_BUFPOST;
+  } else if (cinfo->global_state != DSTATE_BUFPOST) {
+    /* BUFPOST = repeat call after a suspension, anything else is error */
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  }
+  /* Read markers looking for SOS or EOI */
+  while (cinfo->input_scan_number <= cinfo->output_scan_number &&
+	 ! cinfo->inputctl->eoi_reached) {
+    if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
+      return FALSE;		/* Suspend, come back later */
+  }
+  cinfo->global_state = DSTATE_BUFIMAGE;
+  return TRUE;
+}
+
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
diff --git a/libraries/jpeg/jdarith.c b/libraries/jpeg/jdarith.c
new file mode 100644
index 000000000..5533c0739
--- /dev/null
+++ b/libraries/jpeg/jdarith.c
@@ -0,0 +1,796 @@
+/*
+ * jdarith.c
+ *
+ * Developed 1997-2015 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains portable arithmetic entropy decoding routines for JPEG
+ * (implementing the ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81).
+ *
+ * Both sequential and progressive modes are supported in this single module.
+ *
+ * Suspension is not currently supported in this module.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Expanded entropy decoder object for arithmetic decoding. */
+
+typedef struct {
+  struct jpeg_entropy_decoder pub; /* public fields */
+
+  INT32 c;       /* C register, base of coding interval + input bit buffer */
+  INT32 a;               /* A register, normalized size of coding interval */
+  int ct;     /* bit shift counter, # of bits left in bit buffer part of C */
+                                                         /* init: ct = -16 */
+                                                         /* run: ct = 0..7 */
+                                                         /* error: ct = -1 */
+  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
+  int dc_context[MAX_COMPS_IN_SCAN]; /* context index for DC conditioning */
+
+  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
+
+  /* Pointers to statistics areas (these workspaces have image lifespan) */
+  unsigned char * dc_stats[NUM_ARITH_TBLS];
+  unsigned char * ac_stats[NUM_ARITH_TBLS];
+
+  /* Statistics bin for coding with fixed probability 0.5 */
+  unsigned char fixed_bin[4];
+} arith_entropy_decoder;
+
+typedef arith_entropy_decoder * arith_entropy_ptr;
+
+/* The following two definitions specify the allocation chunk size
+ * for the statistics area.
+ * According to sections F.1.4.4.1.3 and F.1.4.4.2, we need at least
+ * 49 statistics bins for DC, and 245 statistics bins for AC coding.
+ *
+ * We use a compact representation with 1 byte per statistics bin,
+ * thus the numbers directly represent byte sizes.
+ * This 1 byte per statistics bin contains the meaning of the MPS
+ * (more probable symbol) in the highest bit (mask 0x80), and the
+ * index into the probability estimation state machine table
+ * in the lower bits (mask 0x7F).
+ */
+
+#define DC_STAT_BINS 64
+#define AC_STAT_BINS 256
+
+
+LOCAL(int)
+get_byte (j_decompress_ptr cinfo)
+/* Read next input byte; we do not support suspension in this module. */
+{
+  struct jpeg_source_mgr * src = cinfo->src;
+
+  if (src->bytes_in_buffer == 0)
+    if (! (*src->fill_input_buffer) (cinfo))
+      ERREXIT(cinfo, JERR_CANT_SUSPEND);
+  src->bytes_in_buffer--;
+  return GETJOCTET(*src->next_input_byte++);
+}
+
+
+/*
+ * The core arithmetic decoding routine (common in JPEG and JBIG).
+ * This needs to go as fast as possible.
+ * Machine-dependent optimization facilities
+ * are not utilized in this portable implementation.
+ * However, this code should be fairly efficient and
+ * may be a good base for further optimizations anyway.
+ *
+ * Return value is 0 or 1 (binary decision).
+ *
+ * Note: I've changed the handling of the code base & bit
+ * buffer register C compared to other implementations
+ * based on the standards layout & procedures.
+ * While it also contains both the actual base of the
+ * coding interval (16 bits) and the next-bits buffer,
+ * the cut-point between these two parts is floating
+ * (instead of fixed) with the bit shift counter CT.
+ * Thus, we also need only one (variable instead of
+ * fixed size) shift for the LPS/MPS decision, and
+ * we can do away with any renormalization update
+ * of C (except for new data insertion, of course).
+ *
+ * I've also introduced a new scheme for accessing
+ * the probability estimation state machine table,
+ * derived from Markus Kuhn's JBIG implementation.
+ */
+
+LOCAL(int)
+arith_decode (j_decompress_ptr cinfo, unsigned char *st)
+{
+  register arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy;
+  register unsigned char nl, nm;
+  register INT32 qe, temp;
+  register int sv, data;
+
+  /* Renormalization & data input per section D.2.6 */
+  while (e->a < 0x8000L) {
+    if (--e->ct < 0) {
+      /* Need to fetch next data byte */
+      if (cinfo->unread_marker)
+	data = 0;		/* stuff zero data */
+      else {
+	data = get_byte(cinfo);	/* read next input byte */
+	if (data == 0xFF) {	/* zero stuff or marker code */
+	  do data = get_byte(cinfo);
+	  while (data == 0xFF);	/* swallow extra 0xFF bytes */
+	  if (data == 0)
+	    data = 0xFF;	/* discard stuffed zero byte */
+	  else {
+	    /* Note: Different from the Huffman decoder, hitting
+	     * a marker while processing the compressed data
+	     * segment is legal in arithmetic coding.
+	     * The convention is to supply zero data
+	     * then until decoding is complete.
+	     */
+	    cinfo->unread_marker = data;
+	    data = 0;
+	  }
+	}
+      }
+      e->c = (e->c << 8) | data; /* insert data into C register */
+      if ((e->ct += 8) < 0)	 /* update bit shift counter */
+	/* Need more initial bytes */
+	if (++e->ct == 0)
+	  /* Got 2 initial bytes -> re-init A and exit loop */
+	  e->a = 0x8000L; /* => e->a = 0x10000L after loop exit */
+    }
+    e->a <<= 1;
+  }
+
+  /* Fetch values from our compact representation of Table D.3(D.2):
+   * Qe values and probability estimation state machine
+   */
+  sv = *st;
+  qe = jpeg_aritab[sv & 0x7F];	/* => Qe_Value */
+  nl = qe & 0xFF; qe >>= 8;	/* Next_Index_LPS + Switch_MPS */
+  nm = qe & 0xFF; qe >>= 8;	/* Next_Index_MPS */
+
+  /* Decode & estimation procedures per sections D.2.4 & D.2.5 */
+  temp = e->a - qe;
+  e->a = temp;
+  temp <<= e->ct;
+  if (e->c >= temp) {
+    e->c -= temp;
+    /* Conditional LPS (less probable symbol) exchange */
+    if (e->a < qe) {
+      e->a = qe;
+      *st = (sv & 0x80) ^ nm;	/* Estimate_after_MPS */
+    } else {
+      e->a = qe;
+      *st = (sv & 0x80) ^ nl;	/* Estimate_after_LPS */
+      sv ^= 0x80;		/* Exchange LPS/MPS */
+    }
+  } else if (e->a < 0x8000L) {
+    /* Conditional MPS (more probable symbol) exchange */
+    if (e->a < qe) {
+      *st = (sv & 0x80) ^ nl;	/* Estimate_after_LPS */
+      sv ^= 0x80;		/* Exchange LPS/MPS */
+    } else {
+      *st = (sv & 0x80) ^ nm;	/* Estimate_after_MPS */
+    }
+  }
+
+  return sv >> 7;
+}
+
+
+/*
+ * Check for a restart marker & resynchronize decoder.
+ */
+
+LOCAL(void)
+process_restart (j_decompress_ptr cinfo)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  int ci;
+  jpeg_component_info * compptr;
+
+  /* Advance past the RSTn marker */
+  if (! (*cinfo->marker->read_restart_marker) (cinfo))
+    ERREXIT(cinfo, JERR_CANT_SUSPEND);
+
+  /* Re-initialize statistics areas */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    if (! cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
+      MEMZERO(entropy->dc_stats[compptr->dc_tbl_no], DC_STAT_BINS);
+      /* Reset DC predictions to 0 */
+      entropy->last_dc_val[ci] = 0;
+      entropy->dc_context[ci] = 0;
+    }
+    if ((! cinfo->progressive_mode && cinfo->lim_Se) ||
+	(cinfo->progressive_mode && cinfo->Ss)) {
+      MEMZERO(entropy->ac_stats[compptr->ac_tbl_no], AC_STAT_BINS);
+    }
+  }
+
+  /* Reset arithmetic decoding variables */
+  entropy->c = 0;
+  entropy->a = 0;
+  entropy->ct = -16;	/* force reading 2 initial bytes to fill C */
+
+  /* Reset restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+}
+
+
+/*
+ * Arithmetic MCU decoding.
+ * Each of these routines decodes and returns one MCU's worth of
+ * arithmetic-compressed coefficients.
+ * The coefficients are reordered from zigzag order into natural array order,
+ * but are not dequantized.
+ *
+ * The i'th block of the MCU is stored into the block pointed to by
+ * MCU_data[i].  WE ASSUME THIS AREA IS INITIALLY ZEROED BY THE CALLER.
+ */
+
+/*
+ * MCU decoding for DC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  JBLOCKROW block;
+  unsigned char *st;
+  int blkn, ci, tbl, sign;
+  int v, m;
+
+  /* Process restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      process_restart(cinfo);
+    entropy->restarts_to_go--;
+  }
+
+  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
+
+  /* Outer loop handles each block in the MCU */
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+    ci = cinfo->MCU_membership[blkn];
+    tbl = cinfo->cur_comp_info[ci]->dc_tbl_no;
+
+    /* Sections F.2.4.1 & F.1.4.4.1: Decoding of DC coefficients */
+
+    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
+    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
+
+    /* Figure F.19: Decode_DC_DIFF */
+    if (arith_decode(cinfo, st) == 0)
+      entropy->dc_context[ci] = 0;
+    else {
+      /* Figure F.21: Decoding nonzero value v */
+      /* Figure F.22: Decoding the sign of v */
+      sign = arith_decode(cinfo, st + 1);
+      st += 2; st += sign;
+      /* Figure F.23: Decoding the magnitude category of v */
+      if ((m = arith_decode(cinfo, st)) != 0) {
+	st = entropy->dc_stats[tbl] + 20;	/* Table F.4: X1 = 20 */
+	while (arith_decode(cinfo, st)) {
+	  if ((m <<= 1) == 0x8000) {
+	    WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+	    entropy->ct = -1;			/* magnitude overflow */
+	    return TRUE;
+	  }
+	  st += 1;
+	}
+      }
+      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
+      if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
+	entropy->dc_context[ci] = 0;		   /* zero diff category */
+      else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
+	entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */
+      else
+	entropy->dc_context[ci] = 4 + (sign * 4);  /* small diff category */
+      v = m;
+      /* Figure F.24: Decoding the magnitude bit pattern of v */
+      st += 14;
+      while (m >>= 1)
+	if (arith_decode(cinfo, st)) v |= m;
+      v += 1; if (sign) v = -v;
+      entropy->last_dc_val[ci] += v;
+    }
+
+    /* Scale and output the DC coefficient (assumes jpeg_natural_order[0]=0) */
+    (*block)[0] = (JCOEF) (entropy->last_dc_val[ci] << cinfo->Al);
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for AC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  JBLOCKROW block;
+  unsigned char *st;
+  int tbl, sign, k;
+  int v, m;
+  const int * natural_order;
+
+  /* Process restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      process_restart(cinfo);
+    entropy->restarts_to_go--;
+  }
+
+  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
+
+  natural_order = cinfo->natural_order;
+
+  /* There is always only one block per MCU */
+  block = MCU_data[0];
+  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
+
+  /* Sections F.2.4.2 & F.1.4.4.2: Decoding of AC coefficients */
+
+  /* Figure F.20: Decode_AC_coefficients */
+  k = cinfo->Ss - 1;
+  do {
+    st = entropy->ac_stats[tbl] + 3 * k;
+    if (arith_decode(cinfo, st)) break;		/* EOB flag */
+    for (;;) {
+      k++;
+      if (arith_decode(cinfo, st + 1)) break;
+      st += 3;
+      if (k >= cinfo->Se) {
+	WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+	entropy->ct = -1;			/* spectral overflow */
+	return TRUE;
+      }
+    }
+    /* Figure F.21: Decoding nonzero value v */
+    /* Figure F.22: Decoding the sign of v */
+    sign = arith_decode(cinfo, entropy->fixed_bin);
+    st += 2;
+    /* Figure F.23: Decoding the magnitude category of v */
+    if ((m = arith_decode(cinfo, st)) != 0) {
+      if (arith_decode(cinfo, st)) {
+	m <<= 1;
+	st = entropy->ac_stats[tbl] +
+	     (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
+	while (arith_decode(cinfo, st)) {
+	  if ((m <<= 1) == 0x8000) {
+	    WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+	    entropy->ct = -1;			/* magnitude overflow */
+	    return TRUE;
+	  }
+	  st += 1;
+	}
+      }
+    }
+    v = m;
+    /* Figure F.24: Decoding the magnitude bit pattern of v */
+    st += 14;
+    while (m >>= 1)
+      if (arith_decode(cinfo, st)) v |= m;
+    v += 1; if (sign) v = -v;
+    /* Scale and output coefficient in natural (dezigzagged) order */
+    (*block)[natural_order[k]] = (JCOEF) (v << cinfo->Al);
+  } while (k < cinfo->Se);
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for DC successive approximation refinement scan.
+ * Note: we assume such scans can be multi-component,
+ * although the spec is not very clear on the point.
+ */
+
+METHODDEF(boolean)
+decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  unsigned char *st;
+  int p1, blkn;
+
+  /* Process restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      process_restart(cinfo);
+    entropy->restarts_to_go--;
+  }
+
+  st = entropy->fixed_bin;	/* use fixed probability estimation */
+  p1 = 1 << cinfo->Al;		/* 1 in the bit position being coded */
+
+  /* Outer loop handles each block in the MCU */
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    /* Encoded data is simply the next bit of the two's-complement DC value */
+    if (arith_decode(cinfo, st))
+      MCU_data[blkn][0][0] |= p1;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for AC successive approximation refinement scan.
+ */
+
+METHODDEF(boolean)
+decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  JBLOCKROW block;
+  JCOEFPTR thiscoef;
+  unsigned char *st;
+  int tbl, k, kex;
+  int p1, m1;
+  const int * natural_order;
+
+  /* Process restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      process_restart(cinfo);
+    entropy->restarts_to_go--;
+  }
+
+  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
+
+  natural_order = cinfo->natural_order;
+
+  /* There is always only one block per MCU */
+  block = MCU_data[0];
+  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
+
+  p1 = 1 << cinfo->Al;		/* 1 in the bit position being coded */
+  m1 = (-1) << cinfo->Al;	/* -1 in the bit position being coded */
+
+  /* Establish EOBx (previous stage end-of-block) index */
+  kex = cinfo->Se;
+  do {
+    if ((*block)[natural_order[kex]]) break;
+  } while (--kex);
+
+  k = cinfo->Ss - 1;
+  do {
+    st = entropy->ac_stats[tbl] + 3 * k;
+    if (k >= kex)
+      if (arith_decode(cinfo, st)) break;	/* EOB flag */
+    for (;;) {
+      thiscoef = *block + natural_order[++k];
+      if (*thiscoef) {				/* previously nonzero coef */
+	if (arith_decode(cinfo, st + 2)) {
+	  if (*thiscoef < 0)
+	    *thiscoef += m1;
+	  else
+	    *thiscoef += p1;
+	}
+	break;
+      }
+      if (arith_decode(cinfo, st + 1)) {	/* newly nonzero coef */
+	if (arith_decode(cinfo, entropy->fixed_bin))
+	  *thiscoef = m1;
+	else
+	  *thiscoef = p1;
+	break;
+      }
+      st += 3;
+      if (k >= cinfo->Se) {
+	WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+	entropy->ct = -1;			/* spectral overflow */
+	return TRUE;
+      }
+    }
+  } while (k < cinfo->Se);
+
+  return TRUE;
+}
+
+
+/*
+ * Decode one MCU's worth of arithmetic-compressed coefficients.
+ */
+
+METHODDEF(boolean)
+decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  jpeg_component_info * compptr;
+  JBLOCKROW block;
+  unsigned char *st;
+  int blkn, ci, tbl, sign, k;
+  int v, m;
+  const int * natural_order;
+
+  /* Process restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      process_restart(cinfo);
+    entropy->restarts_to_go--;
+  }
+
+  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
+
+  natural_order = cinfo->natural_order;
+
+  /* Outer loop handles each block in the MCU */
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+    ci = cinfo->MCU_membership[blkn];
+    compptr = cinfo->cur_comp_info[ci];
+
+    /* Sections F.2.4.1 & F.1.4.4.1: Decoding of DC coefficients */
+
+    tbl = compptr->dc_tbl_no;
+
+    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
+    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
+
+    /* Figure F.19: Decode_DC_DIFF */
+    if (arith_decode(cinfo, st) == 0)
+      entropy->dc_context[ci] = 0;
+    else {
+      /* Figure F.21: Decoding nonzero value v */
+      /* Figure F.22: Decoding the sign of v */
+      sign = arith_decode(cinfo, st + 1);
+      st += 2; st += sign;
+      /* Figure F.23: Decoding the magnitude category of v */
+      if ((m = arith_decode(cinfo, st)) != 0) {
+	st = entropy->dc_stats[tbl] + 20;	/* Table F.4: X1 = 20 */
+	while (arith_decode(cinfo, st)) {
+	  if ((m <<= 1) == 0x8000) {
+	    WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+	    entropy->ct = -1;			/* magnitude overflow */
+	    return TRUE;
+	  }
+	  st += 1;
+	}
+      }
+      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
+      if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
+	entropy->dc_context[ci] = 0;		   /* zero diff category */
+      else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
+	entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */
+      else
+	entropy->dc_context[ci] = 4 + (sign * 4);  /* small diff category */
+      v = m;
+      /* Figure F.24: Decoding the magnitude bit pattern of v */
+      st += 14;
+      while (m >>= 1)
+	if (arith_decode(cinfo, st)) v |= m;
+      v += 1; if (sign) v = -v;
+      entropy->last_dc_val[ci] += v;
+    }
+
+    (*block)[0] = (JCOEF) entropy->last_dc_val[ci];
+
+    /* Sections F.2.4.2 & F.1.4.4.2: Decoding of AC coefficients */
+
+    if (cinfo->lim_Se == 0) continue;
+    tbl = compptr->ac_tbl_no;
+    k = 0;
+
+    /* Figure F.20: Decode_AC_coefficients */
+    do {
+      st = entropy->ac_stats[tbl] + 3 * k;
+      if (arith_decode(cinfo, st)) break;	/* EOB flag */
+      for (;;) {
+	k++;
+	if (arith_decode(cinfo, st + 1)) break;
+	st += 3;
+	if (k >= cinfo->lim_Se) {
+	  WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+	  entropy->ct = -1;			/* spectral overflow */
+	  return TRUE;
+	}
+      }
+      /* Figure F.21: Decoding nonzero value v */
+      /* Figure F.22: Decoding the sign of v */
+      sign = arith_decode(cinfo, entropy->fixed_bin);
+      st += 2;
+      /* Figure F.23: Decoding the magnitude category of v */
+      if ((m = arith_decode(cinfo, st)) != 0) {
+	if (arith_decode(cinfo, st)) {
+	  m <<= 1;
+	  st = entropy->ac_stats[tbl] +
+	       (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
+	  while (arith_decode(cinfo, st)) {
+	    if ((m <<= 1) == 0x8000) {
+	      WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+	      entropy->ct = -1;			/* magnitude overflow */
+	      return TRUE;
+	    }
+	    st += 1;
+	  }
+	}
+      }
+      v = m;
+      /* Figure F.24: Decoding the magnitude bit pattern of v */
+      st += 14;
+      while (m >>= 1)
+	if (arith_decode(cinfo, st)) v |= m;
+      v += 1; if (sign) v = -v;
+      (*block)[natural_order[k]] = (JCOEF) v;
+    } while (k < cinfo->lim_Se);
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Initialize for an arithmetic-compressed scan.
+ */
+
+METHODDEF(void)
+start_pass (j_decompress_ptr cinfo)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  int ci, tbl;
+  jpeg_component_info * compptr;
+
+  if (cinfo->progressive_mode) {
+    /* Validate progressive scan parameters */
+    if (cinfo->Ss == 0) {
+      if (cinfo->Se != 0)
+	goto bad;
+    } else {
+      /* need not check Ss/Se < 0 since they came from unsigned bytes */
+      if (cinfo->Se < cinfo->Ss || cinfo->Se > cinfo->lim_Se)
+	goto bad;
+      /* AC scans may have only one component */
+      if (cinfo->comps_in_scan != 1)
+	goto bad;
+    }
+    if (cinfo->Ah != 0) {
+      /* Successive approximation refinement scan: must have Al = Ah-1. */
+      if (cinfo->Ah-1 != cinfo->Al)
+	goto bad;
+    }
+    if (cinfo->Al > 13) {	/* need not check for < 0 */
+      bad:
+      ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
+	       cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
+    }
+    /* Update progression status, and verify that scan order is legal.
+     * Note that inter-scan inconsistencies are treated as warnings
+     * not fatal errors ... not clear if this is right way to behave.
+     */
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      int coefi, cindex = cinfo->cur_comp_info[ci]->component_index;
+      int *coef_bit_ptr = & cinfo->coef_bits[cindex][0];
+      if (cinfo->Ss && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
+	WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
+      for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
+	int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
+	if (cinfo->Ah != expected)
+	  WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi);
+	coef_bit_ptr[coefi] = cinfo->Al;
+      }
+    }
+    /* Select MCU decoding routine */
+    if (cinfo->Ah == 0) {
+      if (cinfo->Ss == 0)
+	entropy->pub.decode_mcu = decode_mcu_DC_first;
+      else
+	entropy->pub.decode_mcu = decode_mcu_AC_first;
+    } else {
+      if (cinfo->Ss == 0)
+	entropy->pub.decode_mcu = decode_mcu_DC_refine;
+      else
+	entropy->pub.decode_mcu = decode_mcu_AC_refine;
+    }
+  } else {
+    /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG.
+     * This ought to be an error condition, but we make it a warning.
+     */
+    if (cinfo->Ss != 0 || cinfo->Ah != 0 || cinfo->Al != 0 ||
+	(cinfo->Se < DCTSIZE2 && cinfo->Se != cinfo->lim_Se))
+      WARNMS(cinfo, JWRN_NOT_SEQUENTIAL);
+    /* Select MCU decoding routine */
+    entropy->pub.decode_mcu = decode_mcu;
+  }
+
+  /* Allocate & initialize requested statistics areas */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    if (! cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
+      tbl = compptr->dc_tbl_no;
+      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
+	ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
+      if (entropy->dc_stats[tbl] == NULL)
+	entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
+	  ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS);
+      MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS);
+      /* Initialize DC predictions to 0 */
+      entropy->last_dc_val[ci] = 0;
+      entropy->dc_context[ci] = 0;
+    }
+    if ((! cinfo->progressive_mode && cinfo->lim_Se) ||
+	(cinfo->progressive_mode && cinfo->Ss)) {
+      tbl = compptr->ac_tbl_no;
+      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
+	ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
+      if (entropy->ac_stats[tbl] == NULL)
+	entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
+	  ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS);
+      MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS);
+    }
+  }
+
+  /* Initialize arithmetic decoding variables */
+  entropy->c = 0;
+  entropy->a = 0;
+  entropy->ct = -16;	/* force reading 2 initial bytes to fill C */
+
+  /* Initialize restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+}
+
+
+/*
+ * Finish up at the end of an arithmetic-compressed scan.
+ */
+
+METHODDEF(void)
+finish_pass (j_decompress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+
+
+/*
+ * Module initialization routine for arithmetic entropy decoding.
+ */
+
+GLOBAL(void)
+jinit_arith_decoder (j_decompress_ptr cinfo)
+{
+  arith_entropy_ptr entropy;
+  int i;
+
+  entropy = (arith_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(arith_entropy_decoder));
+  cinfo->entropy = &entropy->pub;
+  entropy->pub.start_pass = start_pass;
+  entropy->pub.finish_pass = finish_pass;
+
+  /* Mark tables unallocated */
+  for (i = 0; i < NUM_ARITH_TBLS; i++) {
+    entropy->dc_stats[i] = NULL;
+    entropy->ac_stats[i] = NULL;
+  }
+
+  /* Initialize index for fixed probability estimation */
+  entropy->fixed_bin[0] = 113;
+
+  if (cinfo->progressive_mode) {
+    /* Create progression status table */
+    int *coef_bit_ptr, ci;
+    cinfo->coef_bits = (int (*)[DCTSIZE2])
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  cinfo->num_components*DCTSIZE2*SIZEOF(int));
+    coef_bit_ptr = & cinfo->coef_bits[0][0];
+    for (ci = 0; ci < cinfo->num_components; ci++) 
+      for (i = 0; i < DCTSIZE2; i++)
+	*coef_bit_ptr++ = -1;
+  }
+}
diff --git a/libraries/jpeg/jdatasrc.c b/libraries/jpeg/jdatasrc.c
new file mode 100644
index 000000000..2a27cfed8
--- /dev/null
+++ b/libraries/jpeg/jdatasrc.c
@@ -0,0 +1,275 @@
+/*
+ * jdatasrc.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2009-2015 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains decompression data source routines for the case of
+ * reading JPEG data from memory or from a file (or any stdio stream).
+ * While these routines are sufficient for most applications,
+ * some will want to use a different source manager.
+ * IMPORTANT: we assume that fread() will correctly transcribe an array of
+ * JOCTETs from 8-bit-wide elements on external storage.  If char is wider
+ * than 8 bits on your machine, you may need to do some tweaking.
+ */
+
+/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jerror.h"
+
+
+/* Expanded data source object for stdio input */
+
+typedef struct {
+  struct jpeg_source_mgr pub;	/* public fields */
+
+  FILE * infile;		/* source stream */
+  JOCTET * buffer;		/* start of buffer */
+  boolean start_of_file;	/* have we gotten any data yet? */
+} my_source_mgr;
+
+typedef my_source_mgr * my_src_ptr;
+
+#define INPUT_BUF_SIZE  4096	/* choose an efficiently fread'able size */
+
+
+/*
+ * Initialize source --- called by jpeg_read_header
+ * before any data is actually read.
+ */
+
+METHODDEF(void)
+init_source (j_decompress_ptr cinfo)
+{
+  my_src_ptr src = (my_src_ptr) cinfo->src;
+
+  /* We reset the empty-input-file flag for each image,
+   * but we don't clear the input buffer.
+   * This is correct behavior for reading a series of images from one source.
+   */
+  src->start_of_file = TRUE;
+}
+
+METHODDEF(void)
+init_mem_source (j_decompress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+
+
+/*
+ * Fill the input buffer --- called whenever buffer is emptied.
+ *
+ * In typical applications, this should read fresh data into the buffer
+ * (ignoring the current state of next_input_byte & bytes_in_buffer),
+ * reset the pointer & count to the start of the buffer, and return TRUE
+ * indicating that the buffer has been reloaded.  It is not necessary to
+ * fill the buffer entirely, only to obtain at least one more byte.
+ *
+ * There is no such thing as an EOF return.  If the end of the file has been
+ * reached, the routine has a choice of ERREXIT() or inserting fake data into
+ * the buffer.  In most cases, generating a warning message and inserting a
+ * fake EOI marker is the best course of action --- this will allow the
+ * decompressor to output however much of the image is there.  However,
+ * the resulting error message is misleading if the real problem is an empty
+ * input file, so we handle that case specially.
+ *
+ * In applications that need to be able to suspend compression due to input
+ * not being available yet, a FALSE return indicates that no more data can be
+ * obtained right now, but more may be forthcoming later.  In this situation,
+ * the decompressor will return to its caller (with an indication of the
+ * number of scanlines it has read, if any).  The application should resume
+ * decompression after it has loaded more data into the input buffer.  Note
+ * that there are substantial restrictions on the use of suspension --- see
+ * the documentation.
+ *
+ * When suspending, the decompressor will back up to a convenient restart point
+ * (typically the start of the current MCU). next_input_byte & bytes_in_buffer
+ * indicate where the restart point will be if the current call returns FALSE.
+ * Data beyond this point must be rescanned after resumption, so move it to
+ * the front of the buffer rather than discarding it.
+ */
+
+METHODDEF(boolean)
+fill_input_buffer (j_decompress_ptr cinfo)
+{
+  my_src_ptr src = (my_src_ptr) cinfo->src;
+  size_t nbytes;
+
+  nbytes = JFREAD(src->infile, src->buffer, INPUT_BUF_SIZE);
+
+  if (nbytes <= 0) {
+    if (src->start_of_file)	/* Treat empty input file as fatal error */
+      ERREXIT(cinfo, JERR_INPUT_EMPTY);
+    WARNMS(cinfo, JWRN_JPEG_EOF);
+    /* Insert a fake EOI marker */
+    src->buffer[0] = (JOCTET) 0xFF;
+    src->buffer[1] = (JOCTET) JPEG_EOI;
+    nbytes = 2;
+  }
+
+  src->pub.next_input_byte = src->buffer;
+  src->pub.bytes_in_buffer = nbytes;
+  src->start_of_file = FALSE;
+
+  return TRUE;
+}
+
+METHODDEF(boolean)
+fill_mem_input_buffer (j_decompress_ptr cinfo)
+{
+  static const JOCTET mybuffer[4] = {
+    (JOCTET) 0xFF, (JOCTET) JPEG_EOI, 0, 0
+  };
+
+  /* The whole JPEG data is expected to reside in the supplied memory
+   * buffer, so any request for more data beyond the given buffer size
+   * is treated as an error.
+   */
+  WARNMS(cinfo, JWRN_JPEG_EOF);
+
+  /* Insert a fake EOI marker */
+
+  cinfo->src->next_input_byte = mybuffer;
+  cinfo->src->bytes_in_buffer = 2;
+
+  return TRUE;
+}
+
+
+/*
+ * Skip data --- used to skip over a potentially large amount of
+ * uninteresting data (such as an APPn marker).
+ *
+ * Writers of suspendable-input applications must note that skip_input_data
+ * is not granted the right to give a suspension return.  If the skip extends
+ * beyond the data currently in the buffer, the buffer can be marked empty so
+ * that the next read will cause a fill_input_buffer call that can suspend.
+ * Arranging for additional bytes to be discarded before reloading the input
+ * buffer is the application writer's problem.
+ */
+
+METHODDEF(void)
+skip_input_data (j_decompress_ptr cinfo, long num_bytes)
+{
+  struct jpeg_source_mgr * src = cinfo->src;
+
+  /* Just a dumb implementation for now.  Could use fseek() except
+   * it doesn't work on pipes.  Not clear that being smart is worth
+   * any trouble anyway --- large skips are infrequent.
+   */
+  if (num_bytes > 0) {
+    while (num_bytes > (long) src->bytes_in_buffer) {
+      num_bytes -= (long) src->bytes_in_buffer;
+      (void) (*src->fill_input_buffer) (cinfo);
+      /* note we assume that fill_input_buffer will never return FALSE,
+       * so suspension need not be handled.
+       */
+    }
+    src->next_input_byte += (size_t) num_bytes;
+    src->bytes_in_buffer -= (size_t) num_bytes;
+  }
+}
+
+
+/*
+ * An additional method that can be provided by data source modules is the
+ * resync_to_restart method for error recovery in the presence of RST markers.
+ * For the moment, this source module just uses the default resync method
+ * provided by the JPEG library.  That method assumes that no backtracking
+ * is possible.
+ */
+
+
+/*
+ * Terminate source --- called by jpeg_finish_decompress
+ * after all data has been read.  Often a no-op.
+ *
+ * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding
+ * application must deal with any cleanup that should happen even
+ * for error exit.
+ */
+
+METHODDEF(void)
+term_source (j_decompress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+
+
+/*
+ * Prepare for input from a stdio stream.
+ * The caller must have already opened the stream, and is responsible
+ * for closing it after finishing decompression.
+ */
+
+GLOBAL(void)
+jpeg_stdio_src (j_decompress_ptr cinfo, FILE * infile)
+{
+  my_src_ptr src;
+
+  /* The source object and input buffer are made permanent so that a series
+   * of JPEG images can be read from the same file by calling jpeg_stdio_src
+   * only before the first one.  (If we discarded the buffer at the end of
+   * one image, we'd likely lose the start of the next one.)
+   * This makes it unsafe to use this manager and a different source
+   * manager serially with the same JPEG object.  Caveat programmer.
+   */
+  if (cinfo->src == NULL) {	/* first time for this JPEG object? */
+    cinfo->src = (struct jpeg_source_mgr *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				  SIZEOF(my_source_mgr));
+    src = (my_src_ptr) cinfo->src;
+    src->buffer = (JOCTET *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				  INPUT_BUF_SIZE * SIZEOF(JOCTET));
+  }
+
+  src = (my_src_ptr) cinfo->src;
+  src->pub.init_source = init_source;
+  src->pub.fill_input_buffer = fill_input_buffer;
+  src->pub.skip_input_data = skip_input_data;
+  src->pub.resync_to_restart = jpeg_resync_to_restart; /* use default method */
+  src->pub.term_source = term_source;
+  src->infile = infile;
+  src->pub.bytes_in_buffer = 0; /* forces fill_input_buffer on first read */
+  src->pub.next_input_byte = NULL; /* until buffer loaded */
+}
+
+
+/*
+ * Prepare for input from a supplied memory buffer.
+ * The buffer must contain the whole JPEG data.
+ */
+
+GLOBAL(void)
+jpeg_mem_src (j_decompress_ptr cinfo,
+	      const unsigned char * inbuffer, unsigned long insize)
+{
+  struct jpeg_source_mgr * src;
+
+  if (inbuffer == NULL || insize == 0)	/* Treat empty input as fatal error */
+    ERREXIT(cinfo, JERR_INPUT_EMPTY);
+
+  /* The source object is made permanent so that a series of JPEG images
+   * can be read from the same buffer by calling jpeg_mem_src only before
+   * the first one.
+   */
+  if (cinfo->src == NULL) {	/* first time for this JPEG object? */
+    cinfo->src = (struct jpeg_source_mgr *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				  SIZEOF(struct jpeg_source_mgr));
+  }
+
+  src = cinfo->src;
+  src->init_source = init_mem_source;
+  src->fill_input_buffer = fill_mem_input_buffer;
+  src->skip_input_data = skip_input_data;
+  src->resync_to_restart = jpeg_resync_to_restart; /* use default method */
+  src->term_source = term_source;
+  src->bytes_in_buffer = (size_t) insize;
+  src->next_input_byte = (const JOCTET *) inbuffer;
+}
diff --git a/libraries/jpeg/jdcoefct.c b/libraries/jpeg/jdcoefct.c
new file mode 100644
index 000000000..ed02fc378
--- /dev/null
+++ b/libraries/jpeg/jdcoefct.c
@@ -0,0 +1,741 @@
+/*
+ * jdcoefct.c
+ *
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * Modified 2002-2011 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the coefficient buffer controller for decompression.
+ * This controller is the top level of the JPEG decompressor proper.
+ * The coefficient buffer lies between entropy decoding and inverse-DCT steps.
+ *
+ * In buffered-image mode, this controller is the interface between
+ * input-oriented processing and output-oriented processing.
+ * Also, the input side (only) is used when reading a file for transcoding.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+/* Block smoothing is only applicable for progressive JPEG, so: */
+#ifndef D_PROGRESSIVE_SUPPORTED
+#undef BLOCK_SMOOTHING_SUPPORTED
+#endif
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_d_coef_controller pub; /* public fields */
+
+  /* These variables keep track of the current location of the input side. */
+  /* cinfo->input_iMCU_row is also used for this. */
+  JDIMENSION MCU_ctr;		/* counts MCUs processed in current row */
+  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
+
+  /* The output side's location is represented by cinfo->output_iMCU_row. */
+
+  /* In single-pass modes, it's sufficient to buffer just one MCU.
+   * We allocate a workspace of D_MAX_BLOCKS_IN_MCU coefficient blocks,
+   * and let the entropy decoder write into that workspace each time.
+   * (On 80x86, the workspace is FAR even though it's not really very big;
+   * this is to keep the module interfaces unchanged when a large coefficient
+   * buffer is necessary.)
+   * In multi-pass modes, this array points to the current MCU's blocks
+   * within the virtual arrays; it is used only by the input side.
+   */
+  JBLOCKROW MCU_buffer[D_MAX_BLOCKS_IN_MCU];
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+  /* In multi-pass modes, we need a virtual block array for each component. */
+  jvirt_barray_ptr whole_image[MAX_COMPONENTS];
+#endif
+
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+  /* When doing block smoothing, we latch coefficient Al values here */
+  int * coef_bits_latch;
+#define SAVED_COEFS  6		/* we save coef_bits[0..5] */
+#endif
+} my_coef_controller;
+
+typedef my_coef_controller * my_coef_ptr;
+
+/* Forward declarations */
+METHODDEF(int) decompress_onepass
+	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+METHODDEF(int) decompress_data
+	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
+#endif
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+LOCAL(boolean) smoothing_ok JPP((j_decompress_ptr cinfo));
+METHODDEF(int) decompress_smooth_data
+	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
+#endif
+
+
+LOCAL(void)
+start_iMCU_row (j_decompress_ptr cinfo)
+/* Reset within-iMCU-row counters for a new row (input side) */
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  /* In an interleaved scan, an MCU row is the same as an iMCU row.
+   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
+   * But at the bottom of the image, process only what's left.
+   */
+  if (cinfo->comps_in_scan > 1) {
+    coef->MCU_rows_per_iMCU_row = 1;
+  } else {
+    if (cinfo->input_iMCU_row < (cinfo->total_iMCU_rows-1))
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
+    else
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
+  }
+
+  coef->MCU_ctr = 0;
+  coef->MCU_vert_offset = 0;
+}
+
+
+/*
+ * Initialize for an input processing pass.
+ */
+
+METHODDEF(void)
+start_input_pass (j_decompress_ptr cinfo)
+{
+  cinfo->input_iMCU_row = 0;
+  start_iMCU_row(cinfo);
+}
+
+
+/*
+ * Initialize for an output processing pass.
+ */
+
+METHODDEF(void)
+start_output_pass (j_decompress_ptr cinfo)
+{
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  /* If multipass, check to see whether to use block smoothing on this pass */
+  if (coef->pub.coef_arrays != NULL) {
+    if (cinfo->do_block_smoothing && smoothing_ok(cinfo))
+      coef->pub.decompress_data = decompress_smooth_data;
+    else
+      coef->pub.decompress_data = decompress_data;
+  }
+#endif
+  cinfo->output_iMCU_row = 0;
+}
+
+
+/*
+ * Decompress and return some data in the single-pass case.
+ * Always attempts to emit one fully interleaved MCU row ("iMCU" row).
+ * Input and output must run in lockstep since we have only a one-MCU buffer.
+ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
+ *
+ * NB: output_buf contains a plane for each component in image,
+ * which we index according to the component's SOF position.
+ */
+
+METHODDEF(int)
+decompress_onepass (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  int blkn, ci, xindex, yindex, yoffset, useful_width;
+  JSAMPARRAY output_ptr;
+  JDIMENSION start_col, output_col;
+  jpeg_component_info *compptr;
+  inverse_DCT_method_ptr inverse_DCT;
+
+  /* Loop to process as much as one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->MCU_ctr; MCU_col_num <= last_MCU_col;
+	 MCU_col_num++) {
+      /* Try to fetch an MCU.  Entropy decoder expects buffer to be zeroed. */
+      if (cinfo->lim_Se)	/* can bypass in DC only case */
+	FMEMZERO((void FAR *) coef->MCU_buffer[0],
+		 (size_t) (cinfo->blocks_in_MCU * SIZEOF(JBLOCK)));
+      if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->MCU_ctr = MCU_col_num;
+	return JPEG_SUSPENDED;
+      }
+      /* Determine where data should go in output_buf and do the IDCT thing.
+       * We skip dummy blocks at the right and bottom edges (but blkn gets
+       * incremented past them!).  Note the inner loop relies on having
+       * allocated the MCU_buffer[] blocks sequentially.
+       */
+      blkn = 0;			/* index of current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+	compptr = cinfo->cur_comp_info[ci];
+	/* Don't bother to IDCT an uninteresting component. */
+	if (! compptr->component_needed) {
+	  blkn += compptr->MCU_blocks;
+	  continue;
+	}
+	inverse_DCT = cinfo->idct->inverse_DCT[compptr->component_index];
+	useful_width = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
+						    : compptr->last_col_width;
+	output_ptr = output_buf[compptr->component_index] +
+	  yoffset * compptr->DCT_v_scaled_size;
+	start_col = MCU_col_num * compptr->MCU_sample_width;
+	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+	  if (cinfo->input_iMCU_row < last_iMCU_row ||
+	      yoffset+yindex < compptr->last_row_height) {
+	    output_col = start_col;
+	    for (xindex = 0; xindex < useful_width; xindex++) {
+	      (*inverse_DCT) (cinfo, compptr,
+			      (JCOEFPTR) coef->MCU_buffer[blkn+xindex],
+			      output_ptr, output_col);
+	      output_col += compptr->DCT_h_scaled_size;
+	    }
+	  }
+	  blkn += compptr->MCU_width;
+	  output_ptr += compptr->DCT_v_scaled_size;
+	}
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->MCU_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  cinfo->output_iMCU_row++;
+  if (++(cinfo->input_iMCU_row) < cinfo->total_iMCU_rows) {
+    start_iMCU_row(cinfo);
+    return JPEG_ROW_COMPLETED;
+  }
+  /* Completed the scan */
+  (*cinfo->inputctl->finish_input_pass) (cinfo);
+  return JPEG_SCAN_COMPLETED;
+}
+
+
+/*
+ * Dummy consume-input routine for single-pass operation.
+ */
+
+METHODDEF(int)
+dummy_consume_data (j_decompress_ptr cinfo)
+{
+  return JPEG_SUSPENDED;	/* Always indicate nothing was done */
+}
+
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+
+/*
+ * Consume input data and store it in the full-image coefficient buffer.
+ * We read as much as one fully interleaved MCU row ("iMCU" row) per call,
+ * ie, v_samp_factor block rows for each component in the scan.
+ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
+ */
+
+METHODDEF(int)
+consume_data (j_decompress_ptr cinfo)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  int blkn, ci, xindex, yindex, yoffset;
+  JDIMENSION start_col;
+  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
+  JBLOCKROW buffer_ptr;
+  jpeg_component_info *compptr;
+
+  /* Align the virtual buffers for the components used in this scan. */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    buffer[ci] = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
+       cinfo->input_iMCU_row * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, TRUE);
+    /* Note: entropy decoder expects buffer to be zeroed,
+     * but this is handled automatically by the memory manager
+     * because we requested a pre-zeroed array.
+     */
+  }
+
+  /* Loop to process one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->MCU_ctr; MCU_col_num < cinfo->MCUs_per_row;
+	 MCU_col_num++) {
+      /* Construct list of pointers to DCT blocks belonging to this MCU */
+      blkn = 0;			/* index of current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+	compptr = cinfo->cur_comp_info[ci];
+	start_col = MCU_col_num * compptr->MCU_width;
+	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+	  buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
+	  for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
+	    coef->MCU_buffer[blkn++] = buffer_ptr++;
+	  }
+	}
+      }
+      /* Try to fetch the MCU. */
+      if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->MCU_ctr = MCU_col_num;
+	return JPEG_SUSPENDED;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->MCU_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  if (++(cinfo->input_iMCU_row) < cinfo->total_iMCU_rows) {
+    start_iMCU_row(cinfo);
+    return JPEG_ROW_COMPLETED;
+  }
+  /* Completed the scan */
+  (*cinfo->inputctl->finish_input_pass) (cinfo);
+  return JPEG_SCAN_COMPLETED;
+}
+
+
+/*
+ * Decompress and return some data in the multi-pass case.
+ * Always attempts to emit one fully interleaved MCU row ("iMCU" row).
+ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
+ *
+ * NB: output_buf contains a plane for each component in image.
+ */
+
+METHODDEF(int)
+decompress_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  JDIMENSION block_num;
+  int ci, block_row, block_rows;
+  JBLOCKARRAY buffer;
+  JBLOCKROW buffer_ptr;
+  JSAMPARRAY output_ptr;
+  JDIMENSION output_col;
+  jpeg_component_info *compptr;
+  inverse_DCT_method_ptr inverse_DCT;
+
+  /* Force some input to be done if we are getting ahead of the input. */
+  while (cinfo->input_scan_number < cinfo->output_scan_number ||
+	 (cinfo->input_scan_number == cinfo->output_scan_number &&
+	  cinfo->input_iMCU_row <= cinfo->output_iMCU_row)) {
+    if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED)
+      return JPEG_SUSPENDED;
+  }
+
+  /* OK, output from the virtual arrays. */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Don't bother to IDCT an uninteresting component. */
+    if (! compptr->component_needed)
+      continue;
+    /* Align the virtual buffer for this component. */
+    buffer = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[ci],
+       cinfo->output_iMCU_row * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, FALSE);
+    /* Count non-dummy DCT block rows in this iMCU row. */
+    if (cinfo->output_iMCU_row < last_iMCU_row)
+      block_rows = compptr->v_samp_factor;
+    else {
+      /* NB: can't use last_row_height here; it is input-side-dependent! */
+      block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+      if (block_rows == 0) block_rows = compptr->v_samp_factor;
+    }
+    inverse_DCT = cinfo->idct->inverse_DCT[ci];
+    output_ptr = output_buf[ci];
+    /* Loop over all DCT blocks to be processed. */
+    for (block_row = 0; block_row < block_rows; block_row++) {
+      buffer_ptr = buffer[block_row];
+      output_col = 0;
+      for (block_num = 0; block_num < compptr->width_in_blocks; block_num++) {
+	(*inverse_DCT) (cinfo, compptr, (JCOEFPTR) buffer_ptr,
+			output_ptr, output_col);
+	buffer_ptr++;
+	output_col += compptr->DCT_h_scaled_size;
+      }
+      output_ptr += compptr->DCT_v_scaled_size;
+    }
+  }
+
+  if (++(cinfo->output_iMCU_row) < cinfo->total_iMCU_rows)
+    return JPEG_ROW_COMPLETED;
+  return JPEG_SCAN_COMPLETED;
+}
+
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+
+
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+
+/*
+ * This code applies interblock smoothing as described by section K.8
+ * of the JPEG standard: the first 5 AC coefficients are estimated from
+ * the DC values of a DCT block and its 8 neighboring blocks.
+ * We apply smoothing only for progressive JPEG decoding, and only if
+ * the coefficients it can estimate are not yet known to full precision.
+ */
+
+/* Natural-order array positions of the first 5 zigzag-order coefficients */
+#define Q01_POS  1
+#define Q10_POS  8
+#define Q20_POS  16
+#define Q11_POS  9
+#define Q02_POS  2
+
+/*
+ * Determine whether block smoothing is applicable and safe.
+ * We also latch the current states of the coef_bits[] entries for the
+ * AC coefficients; otherwise, if the input side of the decompressor
+ * advances into a new scan, we might think the coefficients are known
+ * more accurately than they really are.
+ */
+
+LOCAL(boolean)
+smoothing_ok (j_decompress_ptr cinfo)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  boolean smoothing_useful = FALSE;
+  int ci, coefi;
+  jpeg_component_info *compptr;
+  JQUANT_TBL * qtable;
+  int * coef_bits;
+  int * coef_bits_latch;
+
+  if (! cinfo->progressive_mode || cinfo->coef_bits == NULL)
+    return FALSE;
+
+  /* Allocate latch area if not already done */
+  if (coef->coef_bits_latch == NULL)
+    coef->coef_bits_latch = (int *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  cinfo->num_components *
+				  (SAVED_COEFS * SIZEOF(int)));
+  coef_bits_latch = coef->coef_bits_latch;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* All components' quantization values must already be latched. */
+    if ((qtable = compptr->quant_table) == NULL)
+      return FALSE;
+    /* Verify DC & first 5 AC quantizers are nonzero to avoid zero-divide. */
+    if (qtable->quantval[0] == 0 ||
+	qtable->quantval[Q01_POS] == 0 ||
+	qtable->quantval[Q10_POS] == 0 ||
+	qtable->quantval[Q20_POS] == 0 ||
+	qtable->quantval[Q11_POS] == 0 ||
+	qtable->quantval[Q02_POS] == 0)
+      return FALSE;
+    /* DC values must be at least partly known for all components. */
+    coef_bits = cinfo->coef_bits[ci];
+    if (coef_bits[0] < 0)
+      return FALSE;
+    /* Block smoothing is helpful if some AC coefficients remain inaccurate. */
+    for (coefi = 1; coefi <= 5; coefi++) {
+      coef_bits_latch[coefi] = coef_bits[coefi];
+      if (coef_bits[coefi] != 0)
+	smoothing_useful = TRUE;
+    }
+    coef_bits_latch += SAVED_COEFS;
+  }
+
+  return smoothing_useful;
+}
+
+
+/*
+ * Variant of decompress_data for use when doing block smoothing.
+ */
+
+METHODDEF(int)
+decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  JDIMENSION block_num, last_block_column;
+  int ci, block_row, block_rows, access_rows;
+  JBLOCKARRAY buffer;
+  JBLOCKROW buffer_ptr, prev_block_row, next_block_row;
+  JSAMPARRAY output_ptr;
+  JDIMENSION output_col;
+  jpeg_component_info *compptr;
+  inverse_DCT_method_ptr inverse_DCT;
+  boolean first_row, last_row;
+  JBLOCK workspace;
+  int *coef_bits;
+  JQUANT_TBL *quanttbl;
+  INT32 Q00,Q01,Q02,Q10,Q11,Q20, num;
+  int DC1,DC2,DC3,DC4,DC5,DC6,DC7,DC8,DC9;
+  int Al, pred;
+
+  /* Force some input to be done if we are getting ahead of the input. */
+  while (cinfo->input_scan_number <= cinfo->output_scan_number &&
+	 ! cinfo->inputctl->eoi_reached) {
+    if (cinfo->input_scan_number == cinfo->output_scan_number) {
+      /* If input is working on current scan, we ordinarily want it to
+       * have completed the current row.  But if input scan is DC,
+       * we want it to keep one row ahead so that next block row's DC
+       * values are up to date.
+       */
+      JDIMENSION delta = (cinfo->Ss == 0) ? 1 : 0;
+      if (cinfo->input_iMCU_row > cinfo->output_iMCU_row+delta)
+	break;
+    }
+    if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED)
+      return JPEG_SUSPENDED;
+  }
+
+  /* OK, output from the virtual arrays. */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Don't bother to IDCT an uninteresting component. */
+    if (! compptr->component_needed)
+      continue;
+    /* Count non-dummy DCT block rows in this iMCU row. */
+    if (cinfo->output_iMCU_row < last_iMCU_row) {
+      block_rows = compptr->v_samp_factor;
+      access_rows = block_rows * 2; /* this and next iMCU row */
+      last_row = FALSE;
+    } else {
+      /* NB: can't use last_row_height here; it is input-side-dependent! */
+      block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+      if (block_rows == 0) block_rows = compptr->v_samp_factor;
+      access_rows = block_rows; /* this iMCU row only */
+      last_row = TRUE;
+    }
+    /* Align the virtual buffer for this component. */
+    if (cinfo->output_iMCU_row > 0) {
+      access_rows += compptr->v_samp_factor; /* prior iMCU row too */
+      buffer = (*cinfo->mem->access_virt_barray)
+	((j_common_ptr) cinfo, coef->whole_image[ci],
+	 (cinfo->output_iMCU_row - 1) * compptr->v_samp_factor,
+	 (JDIMENSION) access_rows, FALSE);
+      buffer += compptr->v_samp_factor;	/* point to current iMCU row */
+      first_row = FALSE;
+    } else {
+      buffer = (*cinfo->mem->access_virt_barray)
+	((j_common_ptr) cinfo, coef->whole_image[ci],
+	 (JDIMENSION) 0, (JDIMENSION) access_rows, FALSE);
+      first_row = TRUE;
+    }
+    /* Fetch component-dependent info */
+    coef_bits = coef->coef_bits_latch + (ci * SAVED_COEFS);
+    quanttbl = compptr->quant_table;
+    Q00 = quanttbl->quantval[0];
+    Q01 = quanttbl->quantval[Q01_POS];
+    Q10 = quanttbl->quantval[Q10_POS];
+    Q20 = quanttbl->quantval[Q20_POS];
+    Q11 = quanttbl->quantval[Q11_POS];
+    Q02 = quanttbl->quantval[Q02_POS];
+    inverse_DCT = cinfo->idct->inverse_DCT[ci];
+    output_ptr = output_buf[ci];
+    /* Loop over all DCT blocks to be processed. */
+    for (block_row = 0; block_row < block_rows; block_row++) {
+      buffer_ptr = buffer[block_row];
+      if (first_row && block_row == 0)
+	prev_block_row = buffer_ptr;
+      else
+	prev_block_row = buffer[block_row-1];
+      if (last_row && block_row == block_rows-1)
+	next_block_row = buffer_ptr;
+      else
+	next_block_row = buffer[block_row+1];
+      /* We fetch the surrounding DC values using a sliding-register approach.
+       * Initialize all nine here so as to do the right thing on narrow pics.
+       */
+      DC1 = DC2 = DC3 = (int) prev_block_row[0][0];
+      DC4 = DC5 = DC6 = (int) buffer_ptr[0][0];
+      DC7 = DC8 = DC9 = (int) next_block_row[0][0];
+      output_col = 0;
+      last_block_column = compptr->width_in_blocks - 1;
+      for (block_num = 0; block_num <= last_block_column; block_num++) {
+	/* Fetch current DCT block into workspace so we can modify it. */
+	jcopy_block_row(buffer_ptr, (JBLOCKROW) workspace, (JDIMENSION) 1);
+	/* Update DC values */
+	if (block_num < last_block_column) {
+	  DC3 = (int) prev_block_row[1][0];
+	  DC6 = (int) buffer_ptr[1][0];
+	  DC9 = (int) next_block_row[1][0];
+	}
+	/* Compute coefficient estimates per K.8.
+	 * An estimate is applied only if coefficient is still zero,
+	 * and is not known to be fully accurate.
+	 */
+	/* AC01 */
+	if ((Al=coef_bits[1]) != 0 && workspace[1] == 0) {
+	  num = 36 * Q00 * (DC4 - DC6);
+	  if (num >= 0) {
+	    pred = (int) (((Q01<<7) + num) / (Q01<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q01<<7) - num) / (Q01<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[1] = (JCOEF) pred;
+	}
+	/* AC10 */
+	if ((Al=coef_bits[2]) != 0 && workspace[8] == 0) {
+	  num = 36 * Q00 * (DC2 - DC8);
+	  if (num >= 0) {
+	    pred = (int) (((Q10<<7) + num) / (Q10<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q10<<7) - num) / (Q10<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[8] = (JCOEF) pred;
+	}
+	/* AC20 */
+	if ((Al=coef_bits[3]) != 0 && workspace[16] == 0) {
+	  num = 9 * Q00 * (DC2 + DC8 - 2*DC5);
+	  if (num >= 0) {
+	    pred = (int) (((Q20<<7) + num) / (Q20<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q20<<7) - num) / (Q20<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[16] = (JCOEF) pred;
+	}
+	/* AC11 */
+	if ((Al=coef_bits[4]) != 0 && workspace[9] == 0) {
+	  num = 5 * Q00 * (DC1 - DC3 - DC7 + DC9);
+	  if (num >= 0) {
+	    pred = (int) (((Q11<<7) + num) / (Q11<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q11<<7) - num) / (Q11<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[9] = (JCOEF) pred;
+	}
+	/* AC02 */
+	if ((Al=coef_bits[5]) != 0 && workspace[2] == 0) {
+	  num = 9 * Q00 * (DC4 + DC6 - 2*DC5);
+	  if (num >= 0) {
+	    pred = (int) (((Q02<<7) + num) / (Q02<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q02<<7) - num) / (Q02<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[2] = (JCOEF) pred;
+	}
+	/* OK, do the IDCT */
+	(*inverse_DCT) (cinfo, compptr, (JCOEFPTR) workspace,
+			output_ptr, output_col);
+	/* Advance for next column */
+	DC1 = DC2; DC2 = DC3;
+	DC4 = DC5; DC5 = DC6;
+	DC7 = DC8; DC8 = DC9;
+	buffer_ptr++, prev_block_row++, next_block_row++;
+	output_col += compptr->DCT_h_scaled_size;
+      }
+      output_ptr += compptr->DCT_v_scaled_size;
+    }
+  }
+
+  if (++(cinfo->output_iMCU_row) < cinfo->total_iMCU_rows)
+    return JPEG_ROW_COMPLETED;
+  return JPEG_SCAN_COMPLETED;
+}
+
+#endif /* BLOCK_SMOOTHING_SUPPORTED */
+
+
+/*
+ * Initialize coefficient buffer controller.
+ */
+
+GLOBAL(void)
+jinit_d_coef_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
+{
+  my_coef_ptr coef;
+
+  coef = (my_coef_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_coef_controller));
+  cinfo->coef = (struct jpeg_d_coef_controller *) coef;
+  coef->pub.start_input_pass = start_input_pass;
+  coef->pub.start_output_pass = start_output_pass;
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+  coef->coef_bits_latch = NULL;
+#endif
+
+  /* Create the coefficient buffer. */
+  if (need_full_buffer) {
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+    /* Allocate a full-image virtual array for each component, */
+    /* padded to a multiple of samp_factor DCT blocks in each direction. */
+    /* Note we ask for a pre-zeroed array. */
+    int ci, access_rows;
+    jpeg_component_info *compptr;
+
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      access_rows = compptr->v_samp_factor;
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+      /* If block smoothing could be used, need a bigger window */
+      if (cinfo->progressive_mode)
+	access_rows *= 3;
+#endif
+      coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE, TRUE,
+	 (JDIMENSION) jround_up((long) compptr->width_in_blocks,
+				(long) compptr->h_samp_factor),
+	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
+				(long) compptr->v_samp_factor),
+	 (JDIMENSION) access_rows);
+    }
+    coef->pub.consume_data = consume_data;
+    coef->pub.decompress_data = decompress_data;
+    coef->pub.coef_arrays = coef->whole_image; /* link to virtual arrays */
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    /* We only need a single-MCU buffer. */
+    JBLOCKROW buffer;
+    int i;
+
+    buffer = (JBLOCKROW)
+      (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  D_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+    for (i = 0; i < D_MAX_BLOCKS_IN_MCU; i++) {
+      coef->MCU_buffer[i] = buffer + i;
+    }
+    if (cinfo->lim_Se == 0)	/* DC only case: want to bypass later */
+      FMEMZERO((void FAR *) buffer,
+	       (size_t) (D_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK)));
+    coef->pub.consume_data = dummy_consume_data;
+    coef->pub.decompress_data = decompress_onepass;
+    coef->pub.coef_arrays = NULL; /* flag for no virtual arrays */
+  }
+}
diff --git a/libraries/jpeg/jdcolor.c b/libraries/jpeg/jdcolor.c
new file mode 100644
index 000000000..0316354da
--- /dev/null
+++ b/libraries/jpeg/jdcolor.c
@@ -0,0 +1,731 @@
+/*
+ * jdcolor.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2011-2017 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains output colorspace conversion routines.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+#if RANGE_BITS < 2
+  /* Deliberate syntax err */
+  Sorry, this code requires 2 or more range extension bits.
+#endif
+
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_color_deconverter pub; /* public fields */
+
+  /* Private state for YCbCr->RGB and BG_YCC->RGB conversion */
+  int * Cr_r_tab;		/* => table for Cr to R conversion */
+  int * Cb_b_tab;		/* => table for Cb to B conversion */
+  INT32 * Cr_g_tab;		/* => table for Cr to G conversion */
+  INT32 * Cb_g_tab;		/* => table for Cb to G conversion */
+
+  /* Private state for RGB->Y conversion */
+  INT32 * rgb_y_tab;		/* => table for RGB to Y conversion */
+} my_color_deconverter;
+
+typedef my_color_deconverter * my_cconvert_ptr;
+
+
+/***************  YCbCr -> RGB conversion: most common case **************/
+/*************** BG_YCC -> RGB conversion: less common case **************/
+/***************    RGB -> Y   conversion: less common case **************/
+
+/*
+ * YCbCr is defined per Recommendation ITU-R BT.601-7 (03/2011),
+ * previously known as Recommendation CCIR 601-1, except that Cb and Cr
+ * are normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
+ * sRGB (standard RGB color space) is defined per IEC 61966-2-1:1999.
+ * sYCC (standard luma-chroma-chroma color space with extended gamut)
+ * is defined per IEC 61966-2-1:1999 Amendment A1:2003 Annex F.
+ * bg-sRGB and bg-sYCC (big gamut standard color spaces)
+ * are defined per IEC 61966-2-1:1999 Amendment A1:2003 Annex G.
+ * Note that the derived conversion coefficients given in some of these
+ * documents are imprecise.  The general conversion equations are
+ *
+ *	R = Y + K * (1 - Kr) * Cr
+ *	G = Y - K * (Kb * (1 - Kb) * Cb + Kr * (1 - Kr) * Cr) / (1 - Kr - Kb)
+ *	B = Y + K * (1 - Kb) * Cb
+ *
+ *	Y = Kr * R + (1 - Kr - Kb) * G + Kb * B
+ *
+ * With Kr = 0.299 and Kb = 0.114 (derived according to SMPTE RP 177-1993
+ * from the 1953 FCC NTSC primaries and CIE Illuminant C), K = 2 for sYCC,
+ * the conversion equations to be implemented are therefore
+ *
+ *	R = Y + 1.402 * Cr
+ *	G = Y - 0.344136286 * Cb - 0.714136286 * Cr
+ *	B = Y + 1.772 * Cb
+ *
+ *	Y = 0.299 * R + 0.587 * G + 0.114 * B
+ *
+ * where Cb and Cr represent the incoming values less CENTERJSAMPLE.
+ * For bg-sYCC, with K = 4, the equations are
+ *
+ *	R = Y + 2.804 * Cr
+ *	G = Y - 0.688272572 * Cb - 1.428272572 * Cr
+ *	B = Y + 3.544 * Cb
+ *
+ * To avoid floating-point arithmetic, we represent the fractional constants
+ * as integers scaled up by 2^16 (about 4 digits precision); we have to divide
+ * the products by 2^16, with appropriate rounding, to get the correct answer.
+ * Notice that Y, being an integral input, does not contribute any fraction
+ * so it need not participate in the rounding.
+ *
+ * For even more speed, we avoid doing any multiplications in the inner loop
+ * by precalculating the constants times Cb and Cr for all possible values.
+ * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table);
+ * for 9-bit to 12-bit samples it is still acceptable.  It's not very
+ * reasonable for 16-bit samples, but if you want lossless storage you
+ * shouldn't be changing colorspace anyway.
+ * The Cr=>R and Cb=>B values can be rounded to integers in advance; the
+ * values for the G calculation are left scaled up, since we must add them
+ * together before rounding.
+ */
+
+#define SCALEBITS	16	/* speediest right-shift on some machines */
+#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
+#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+
+/* We allocate one big table for RGB->Y conversion and divide it up into
+ * three parts, instead of doing three alloc_small requests.  This lets us
+ * use a single table base address, which can be held in a register in the
+ * inner loops on many machines (more than can hold all three addresses,
+ * anyway).
+ */
+
+#define R_Y_OFF		0			/* offset to R => Y section */
+#define G_Y_OFF		(1*(MAXJSAMPLE+1))	/* offset to G => Y section */
+#define B_Y_OFF		(2*(MAXJSAMPLE+1))	/* etc. */
+#define TABLE_SIZE	(3*(MAXJSAMPLE+1))
+
+
+/*
+ * Initialize tables for YCbCr->RGB and BG_YCC->RGB colorspace conversion.
+ */
+
+LOCAL(void)
+build_ycc_rgb_table (j_decompress_ptr cinfo)
+/* Normal case, sYCC */
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  int i;
+  INT32 x;
+  SHIFT_TEMPS
+
+  cconvert->Cr_r_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(int));
+  cconvert->Cb_b_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(int));
+  cconvert->Cr_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(INT32));
+  cconvert->Cb_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(INT32));
+
+  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
+    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
+    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
+    /* Cr=>R value is nearest int to 1.402 * x */
+    cconvert->Cr_r_tab[i] = (int)
+		    RIGHT_SHIFT(FIX(1.402) * x + ONE_HALF, SCALEBITS);
+    /* Cb=>B value is nearest int to 1.772 * x */
+    cconvert->Cb_b_tab[i] = (int)
+		    RIGHT_SHIFT(FIX(1.772) * x + ONE_HALF, SCALEBITS);
+    /* Cr=>G value is scaled-up -0.714136286 * x */
+    cconvert->Cr_g_tab[i] = (- FIX(0.714136286)) * x;
+    /* Cb=>G value is scaled-up -0.344136286 * x */
+    /* We also add in ONE_HALF so that need not do it in inner loop */
+    cconvert->Cb_g_tab[i] = (- FIX(0.344136286)) * x + ONE_HALF;
+  }
+}
+
+
+LOCAL(void)
+build_bg_ycc_rgb_table (j_decompress_ptr cinfo)
+/* Wide gamut case, bg-sYCC */
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  int i;
+  INT32 x;
+  SHIFT_TEMPS
+
+  cconvert->Cr_r_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(int));
+  cconvert->Cb_b_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(int));
+  cconvert->Cr_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(INT32));
+  cconvert->Cb_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(INT32));
+
+  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
+    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
+    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
+    /* Cr=>R value is nearest int to 2.804 * x */
+    cconvert->Cr_r_tab[i] = (int)
+		    RIGHT_SHIFT(FIX(2.804) * x + ONE_HALF, SCALEBITS);
+    /* Cb=>B value is nearest int to 3.544 * x */
+    cconvert->Cb_b_tab[i] = (int)
+		    RIGHT_SHIFT(FIX(3.544) * x + ONE_HALF, SCALEBITS);
+    /* Cr=>G value is scaled-up -1.428272572 * x */
+    cconvert->Cr_g_tab[i] = (- FIX(1.428272572)) * x;
+    /* Cb=>G value is scaled-up -0.688272572 * x */
+    /* We also add in ONE_HALF so that need not do it in inner loop */
+    cconvert->Cb_g_tab[i] = (- FIX(0.688272572)) * x + ONE_HALF;
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the output colorspace.
+ *
+ * Note that we change from noninterleaved, one-plane-per-component format
+ * to interleaved-pixel format.  The output buffer is therefore three times
+ * as wide as the input buffer.
+ * A starting row offset is provided only for the input buffer.  The caller
+ * can easily adjust the passed output_buf value to accommodate any row
+ * offset required on that side.
+ */
+
+METHODDEF(void)
+ycc_rgb_convert (j_decompress_ptr cinfo,
+		 JSAMPIMAGE input_buf, JDIMENSION input_row,
+		 JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int y, cb, cr;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  register int * Crrtab = cconvert->Cr_r_tab;
+  register int * Cbbtab = cconvert->Cb_b_tab;
+  register INT32 * Crgtab = cconvert->Cr_g_tab;
+  register INT32 * Cbgtab = cconvert->Cb_g_tab;
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      y  = GETJSAMPLE(inptr0[col]);
+      cb = GETJSAMPLE(inptr1[col]);
+      cr = GETJSAMPLE(inptr2[col]);
+      /* Range-limiting is essential due to noise introduced by DCT losses,
+       * for extended gamut (sYCC) and wide gamut (bg-sYCC) encodings.
+       */
+      outptr[RGB_RED]   = range_limit[y + Crrtab[cr]];
+      outptr[RGB_GREEN] = range_limit[y +
+			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+						 SCALEBITS))];
+      outptr[RGB_BLUE]  = range_limit[y + Cbbtab[cb]];
+      outptr += RGB_PIXELSIZE;
+    }
+  }
+}
+
+
+/**************** Cases other than YCC -> RGB ****************/
+
+
+/*
+ * Initialize for RGB->grayscale colorspace conversion.
+ */
+
+LOCAL(void)
+build_rgb_y_table (j_decompress_ptr cinfo)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  INT32 * rgb_y_tab;
+  INT32 i;
+
+  /* Allocate and fill in the conversion tables. */
+  cconvert->rgb_y_tab = rgb_y_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(TABLE_SIZE * SIZEOF(INT32)));
+
+  for (i = 0; i <= MAXJSAMPLE; i++) {
+    rgb_y_tab[i+R_Y_OFF] = FIX(0.299) * i;
+    rgb_y_tab[i+G_Y_OFF] = FIX(0.587) * i;
+    rgb_y_tab[i+B_Y_OFF] = FIX(0.114) * i + ONE_HALF;
+  }
+}
+
+
+/*
+ * Convert RGB to grayscale.
+ */
+
+METHODDEF(void)
+rgb_gray_convert (j_decompress_ptr cinfo,
+		  JSAMPIMAGE input_buf, JDIMENSION input_row,
+		  JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register INT32 * ctab = cconvert->rgb_y_tab;
+  register int r, g, b;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      r = GETJSAMPLE(inptr0[col]);
+      g = GETJSAMPLE(inptr1[col]);
+      b = GETJSAMPLE(inptr2[col]);
+      /* Y */
+      outptr[col] = (JSAMPLE)
+		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+		 >> SCALEBITS);
+    }
+  }
+}
+
+
+/*
+ * [R-G,G,B-G] to [R,G,B] conversion with modulo calculation
+ * (inverse color transform).
+ * This can be seen as an adaption of the general YCbCr->RGB
+ * conversion equation with Kr = Kb = 0, while replacing the
+ * normalization by modulo calculation.
+ */
+
+METHODDEF(void)
+rgb1_rgb_convert (j_decompress_ptr cinfo,
+		  JSAMPIMAGE input_buf, JDIMENSION input_row,
+		  JSAMPARRAY output_buf, int num_rows)
+{
+  register int r, g, b;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      r = GETJSAMPLE(inptr0[col]);
+      g = GETJSAMPLE(inptr1[col]);
+      b = GETJSAMPLE(inptr2[col]);
+      /* Assume that MAXJSAMPLE+1 is a power of 2, so that the MOD
+       * (modulo) operator is equivalent to the bitmask operator AND.
+       */
+      outptr[RGB_RED]   = (JSAMPLE) ((r + g - CENTERJSAMPLE) & MAXJSAMPLE);
+      outptr[RGB_GREEN] = (JSAMPLE) g;
+      outptr[RGB_BLUE]  = (JSAMPLE) ((b + g - CENTERJSAMPLE) & MAXJSAMPLE);
+      outptr += RGB_PIXELSIZE;
+    }
+  }
+}
+
+
+/*
+ * [R-G,G,B-G] to grayscale conversion with modulo calculation
+ * (inverse color transform).
+ */
+
+METHODDEF(void)
+rgb1_gray_convert (j_decompress_ptr cinfo,
+		   JSAMPIMAGE input_buf, JDIMENSION input_row,
+		   JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register INT32 * ctab = cconvert->rgb_y_tab;
+  register int r, g, b;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      r = GETJSAMPLE(inptr0[col]);
+      g = GETJSAMPLE(inptr1[col]);
+      b = GETJSAMPLE(inptr2[col]);
+      /* Assume that MAXJSAMPLE+1 is a power of 2, so that the MOD
+       * (modulo) operator is equivalent to the bitmask operator AND.
+       */
+      r = (r + g - CENTERJSAMPLE) & MAXJSAMPLE;
+      b = (b + g - CENTERJSAMPLE) & MAXJSAMPLE;
+      /* Y */
+      outptr[col] = (JSAMPLE)
+		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+		 >> SCALEBITS);
+    }
+  }
+}
+
+
+/*
+ * No colorspace change, but conversion from separate-planes
+ * to interleaved representation.
+ */
+
+METHODDEF(void)
+rgb_convert (j_decompress_ptr cinfo,
+	     JSAMPIMAGE input_buf, JDIMENSION input_row,
+	     JSAMPARRAY output_buf, int num_rows)
+{
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      /* We can dispense with GETJSAMPLE() here */
+      outptr[RGB_RED]   = inptr0[col];
+      outptr[RGB_GREEN] = inptr1[col];
+      outptr[RGB_BLUE]  = inptr2[col];
+      outptr += RGB_PIXELSIZE;
+    }
+  }
+}
+
+
+/*
+ * Color conversion for no colorspace change: just copy the data,
+ * converting from separate-planes to interleaved representation.
+ */
+
+METHODDEF(void)
+null_convert (j_decompress_ptr cinfo,
+	      JSAMPIMAGE input_buf, JDIMENSION input_row,
+	      JSAMPARRAY output_buf, int num_rows)
+{
+  int ci;
+  register int nc = cinfo->num_components;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    for (ci = 0; ci < nc; ci++) {
+      inptr = input_buf[ci][input_row];
+      outptr = output_buf[0] + ci;
+      for (col = 0; col < num_cols; col++) {
+	*outptr = *inptr++;	/* needn't bother with GETJSAMPLE() here */
+	outptr += nc;
+      }
+    }
+    input_row++;
+    output_buf++;
+  }
+}
+
+
+/*
+ * Color conversion for grayscale: just copy the data.
+ * This also works for YCC -> grayscale conversion, in which
+ * we just copy the Y (luminance) component and ignore chrominance.
+ */
+
+METHODDEF(void)
+grayscale_convert (j_decompress_ptr cinfo,
+		   JSAMPIMAGE input_buf, JDIMENSION input_row,
+		   JSAMPARRAY output_buf, int num_rows)
+{
+  jcopy_sample_rows(input_buf[0], (int) input_row, output_buf, 0,
+		    num_rows, cinfo->output_width);
+}
+
+
+/*
+ * Convert grayscale to RGB: just duplicate the graylevel three times.
+ * This is provided to support applications that don't want to cope
+ * with grayscale as a separate case.
+ */
+
+METHODDEF(void)
+gray_rgb_convert (j_decompress_ptr cinfo,
+		  JSAMPIMAGE input_buf, JDIMENSION input_row,
+		  JSAMPARRAY output_buf, int num_rows)
+{
+  register JSAMPROW outptr;
+  register JSAMPROW inptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    inptr = input_buf[0][input_row++];
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      /* We can dispense with GETJSAMPLE() here */
+      outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col];
+      outptr += RGB_PIXELSIZE;
+    }
+  }
+}
+
+
+/*
+ * Adobe-style YCCK->CMYK conversion.
+ * We convert YCbCr to R=1-C, G=1-M, and B=1-Y using the same
+ * conversion as above, while passing K (black) unchanged.
+ * We assume build_ycc_rgb_table has been called.
+ */
+
+METHODDEF(void)
+ycck_cmyk_convert (j_decompress_ptr cinfo,
+		   JSAMPIMAGE input_buf, JDIMENSION input_row,
+		   JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int y, cb, cr;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2, inptr3;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  register int * Crrtab = cconvert->Cr_r_tab;
+  register int * Cbbtab = cconvert->Cb_b_tab;
+  register INT32 * Crgtab = cconvert->Cr_g_tab;
+  register INT32 * Cbgtab = cconvert->Cb_g_tab;
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    inptr3 = input_buf[3][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      y  = GETJSAMPLE(inptr0[col]);
+      cb = GETJSAMPLE(inptr1[col]);
+      cr = GETJSAMPLE(inptr2[col]);
+      /* Range-limiting is essential due to noise introduced by DCT losses,
+       * and for extended gamut encodings (sYCC).
+       */
+      outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])];	/* red */
+      outptr[1] = range_limit[MAXJSAMPLE - (y +			/* green */
+			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+						 SCALEBITS)))];
+      outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])];	/* blue */
+      /* K passes through unchanged */
+      outptr[3] = inptr3[col];	/* don't need GETJSAMPLE here */
+      outptr += 4;
+    }
+  }
+}
+
+
+/*
+ * Empty method for start_pass.
+ */
+
+METHODDEF(void)
+start_pass_dcolor (j_decompress_ptr cinfo)
+{
+  /* no work needed */
+}
+
+
+/*
+ * Module initialization routine for output colorspace conversion.
+ */
+
+GLOBAL(void)
+jinit_color_deconverter (j_decompress_ptr cinfo)
+{
+  my_cconvert_ptr cconvert;
+  int ci;
+
+  cconvert = (my_cconvert_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_color_deconverter));
+  cinfo->cconvert = &cconvert->pub;
+  cconvert->pub.start_pass = start_pass_dcolor;
+
+  /* Make sure num_components agrees with jpeg_color_space */
+  switch (cinfo->jpeg_color_space) {
+  case JCS_GRAYSCALE:
+    if (cinfo->num_components != 1)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    break;
+
+  case JCS_RGB:
+  case JCS_YCbCr:
+  case JCS_BG_RGB:
+  case JCS_BG_YCC:
+    if (cinfo->num_components != 3)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    break;
+
+  case JCS_CMYK:
+  case JCS_YCCK:
+    if (cinfo->num_components != 4)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    break;
+
+  default:			/* JCS_UNKNOWN can be anything */
+    if (cinfo->num_components < 1)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    break;
+  }
+
+  /* Support color transform only for RGB colorspaces */
+  if (cinfo->color_transform &&
+      cinfo->jpeg_color_space != JCS_RGB &&
+      cinfo->jpeg_color_space != JCS_BG_RGB)
+    ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+
+  /* Set out_color_components and conversion method based on requested space.
+   * Also clear the component_needed flags for any unused components,
+   * so that earlier pipeline stages can avoid useless computation.
+   */
+
+  switch (cinfo->out_color_space) {
+  case JCS_GRAYSCALE:
+    cinfo->out_color_components = 1;
+    switch (cinfo->jpeg_color_space) {
+    case JCS_GRAYSCALE:
+    case JCS_YCbCr:
+    case JCS_BG_YCC:
+      cconvert->pub.color_convert = grayscale_convert;
+      /* For color->grayscale conversion, only the Y (0) component is needed */
+      for (ci = 1; ci < cinfo->num_components; ci++)
+	cinfo->comp_info[ci].component_needed = FALSE;
+      break;
+    case JCS_RGB:
+      switch (cinfo->color_transform) {
+      case JCT_NONE:
+	cconvert->pub.color_convert = rgb_gray_convert;
+	break;
+      case JCT_SUBTRACT_GREEN:
+	cconvert->pub.color_convert = rgb1_gray_convert;
+	break;
+      default:
+	ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+      }
+      build_rgb_y_table(cinfo);
+      break;
+    default:
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
+    break;
+
+  case JCS_RGB:
+    cinfo->out_color_components = RGB_PIXELSIZE;
+    switch (cinfo->jpeg_color_space) {
+    case JCS_GRAYSCALE:
+      cconvert->pub.color_convert = gray_rgb_convert;
+      break;
+    case JCS_YCbCr:
+      cconvert->pub.color_convert = ycc_rgb_convert;
+      build_ycc_rgb_table(cinfo);
+      break;
+    case JCS_BG_YCC:
+      cconvert->pub.color_convert = ycc_rgb_convert;
+      build_bg_ycc_rgb_table(cinfo);
+      break;
+    case JCS_RGB:
+      switch (cinfo->color_transform) {
+      case JCT_NONE:
+	cconvert->pub.color_convert = rgb_convert;
+	break;
+      case JCT_SUBTRACT_GREEN:
+	cconvert->pub.color_convert = rgb1_rgb_convert;
+	break;
+      default:
+	ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+      }
+      break;
+    default:
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
+    break;
+
+  case JCS_BG_RGB:
+    cinfo->out_color_components = RGB_PIXELSIZE;
+    if (cinfo->jpeg_color_space == JCS_BG_RGB) {
+      switch (cinfo->color_transform) {
+      case JCT_NONE:
+	cconvert->pub.color_convert = rgb_convert;
+	break;
+      case JCT_SUBTRACT_GREEN:
+	cconvert->pub.color_convert = rgb1_rgb_convert;
+	break;
+      default:
+	ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+      }
+    } else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_CMYK:
+    cinfo->out_color_components = 4;
+    switch (cinfo->jpeg_color_space) {
+    case JCS_YCCK:
+      cconvert->pub.color_convert = ycck_cmyk_convert;
+      build_ycc_rgb_table(cinfo);
+      break;
+    case JCS_CMYK:
+      cconvert->pub.color_convert = null_convert;
+      break;
+    default:
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
+    break;
+
+  default:
+    /* Permit null conversion to same output space */
+    if (cinfo->out_color_space == cinfo->jpeg_color_space) {
+      cinfo->out_color_components = cinfo->num_components;
+      cconvert->pub.color_convert = null_convert;
+    } else			/* unsupported non-null conversion */
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+  }
+
+  if (cinfo->quantize_colors)
+    cinfo->output_components = 1; /* single colormapped output component */
+  else
+    cinfo->output_components = cinfo->out_color_components;
+}
diff --git a/libraries/jpeg/jdct.h b/libraries/jpeg/jdct.h
new file mode 100644
index 000000000..bcfedfcfd
--- /dev/null
+++ b/libraries/jpeg/jdct.h
@@ -0,0 +1,416 @@
+/*
+ * jdct.h
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2002-2017 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This include file contains common declarations for the forward and
+ * inverse DCT modules.  These declarations are private to the DCT managers
+ * (jcdctmgr.c, jddctmgr.c) and the individual DCT algorithms.
+ * The individual DCT algorithms are kept in separate files to ease 
+ * machine-dependent tuning (e.g., assembly coding).
+ */
+
+
+/*
+ * A forward DCT routine is given a pointer to an input sample array and
+ * a pointer to a work area of type DCTELEM[]; the DCT is to be performed
+ * in-place in that buffer.  Type DCTELEM is int for 8-bit samples, INT32
+ * for 12-bit samples.  (NOTE: Floating-point DCT implementations use an
+ * array of type FAST_FLOAT, instead.)
+ * The input data is to be fetched from the sample array starting at a
+ * specified column.  (Any row offset needed will be applied to the array
+ * pointer before it is passed to the FDCT code.)
+ * Note that the number of samples fetched by the FDCT routine is
+ * DCT_h_scaled_size * DCT_v_scaled_size.
+ * The DCT outputs are returned scaled up by a factor of 8; they therefore
+ * have a range of +-8K for 8-bit data, +-128K for 12-bit data.  This
+ * convention improves accuracy in integer implementations and saves some
+ * work in floating-point ones.
+ * Quantization of the output coefficients is done by jcdctmgr.c.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+typedef int DCTELEM;		/* 16 or 32 bits is fine */
+#else
+typedef INT32 DCTELEM;		/* must have 32 bits */
+#endif
+
+typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data,
+					       JSAMPARRAY sample_data,
+					       JDIMENSION start_col));
+typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data,
+					     JSAMPARRAY sample_data,
+					     JDIMENSION start_col));
+
+
+/*
+ * An inverse DCT routine is given a pointer to the input JBLOCK and a pointer
+ * to an output sample array.  The routine must dequantize the input data as
+ * well as perform the IDCT; for dequantization, it uses the multiplier table
+ * pointed to by compptr->dct_table.  The output data is to be placed into the
+ * sample array starting at a specified column.  (Any row offset needed will
+ * be applied to the array pointer before it is passed to the IDCT code.)
+ * Note that the number of samples emitted by the IDCT routine is
+ * DCT_h_scaled_size * DCT_v_scaled_size.
+ */
+
+/* typedef inverse_DCT_method_ptr is declared in jpegint.h */
+
+/*
+ * Each IDCT routine has its own ideas about the best dct_table element type.
+ */
+
+typedef MULTIPLIER ISLOW_MULT_TYPE; /* short or int, whichever is faster */
+#if BITS_IN_JSAMPLE == 8
+typedef MULTIPLIER IFAST_MULT_TYPE; /* 16 bits is OK, use short if faster */
+#define IFAST_SCALE_BITS  2	/* fractional bits in scale factors */
+#else
+typedef INT32 IFAST_MULT_TYPE;	/* need 32 bits for scaled quantizers */
+#define IFAST_SCALE_BITS  13	/* fractional bits in scale factors */
+#endif
+typedef FAST_FLOAT FLOAT_MULT_TYPE; /* preferred floating type */
+
+
+/*
+ * Each IDCT routine is responsible for range-limiting its results and
+ * converting them to unsigned form (0..MAXJSAMPLE).  The raw outputs could
+ * be quite far out of range if the input data is corrupt, so a bulletproof
+ * range-limiting step is required.  We use a mask-and-table-lookup method
+ * to do the combined operations quickly, assuming that RANGE_CENTER
+ * (defined in jpegint.h) is a power of 2.  See the comments with
+ * prepare_range_limit_table (in jdmaster.c) for more info.
+ */
+
+#define RANGE_MASK  (RANGE_CENTER * 2 - 1)
+#define RANGE_SUBSET  (RANGE_CENTER - CENTERJSAMPLE)
+
+#define IDCT_range_limit(cinfo)  ((cinfo)->sample_range_limit - RANGE_SUBSET)
+
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_fdct_islow		jFDislow
+#define jpeg_fdct_ifast		jFDifast
+#define jpeg_fdct_float		jFDfloat
+#define jpeg_fdct_7x7		jFD7x7
+#define jpeg_fdct_6x6		jFD6x6
+#define jpeg_fdct_5x5		jFD5x5
+#define jpeg_fdct_4x4		jFD4x4
+#define jpeg_fdct_3x3		jFD3x3
+#define jpeg_fdct_2x2		jFD2x2
+#define jpeg_fdct_1x1		jFD1x1
+#define jpeg_fdct_9x9		jFD9x9
+#define jpeg_fdct_10x10		jFD10x10
+#define jpeg_fdct_11x11		jFD11x11
+#define jpeg_fdct_12x12		jFD12x12
+#define jpeg_fdct_13x13		jFD13x13
+#define jpeg_fdct_14x14		jFD14x14
+#define jpeg_fdct_15x15		jFD15x15
+#define jpeg_fdct_16x16		jFD16x16
+#define jpeg_fdct_16x8		jFD16x8
+#define jpeg_fdct_14x7		jFD14x7
+#define jpeg_fdct_12x6		jFD12x6
+#define jpeg_fdct_10x5		jFD10x5
+#define jpeg_fdct_8x4		jFD8x4
+#define jpeg_fdct_6x3		jFD6x3
+#define jpeg_fdct_4x2		jFD4x2
+#define jpeg_fdct_2x1		jFD2x1
+#define jpeg_fdct_8x16		jFD8x16
+#define jpeg_fdct_7x14		jFD7x14
+#define jpeg_fdct_6x12		jFD6x12
+#define jpeg_fdct_5x10		jFD5x10
+#define jpeg_fdct_4x8		jFD4x8
+#define jpeg_fdct_3x6		jFD3x6
+#define jpeg_fdct_2x4		jFD2x4
+#define jpeg_fdct_1x2		jFD1x2
+#define jpeg_idct_islow		jRDislow
+#define jpeg_idct_ifast		jRDifast
+#define jpeg_idct_float		jRDfloat
+#define jpeg_idct_7x7		jRD7x7
+#define jpeg_idct_6x6		jRD6x6
+#define jpeg_idct_5x5		jRD5x5
+#define jpeg_idct_4x4		jRD4x4
+#define jpeg_idct_3x3		jRD3x3
+#define jpeg_idct_2x2		jRD2x2
+#define jpeg_idct_1x1		jRD1x1
+#define jpeg_idct_9x9		jRD9x9
+#define jpeg_idct_10x10		jRD10x10
+#define jpeg_idct_11x11		jRD11x11
+#define jpeg_idct_12x12		jRD12x12
+#define jpeg_idct_13x13		jRD13x13
+#define jpeg_idct_14x14		jRD14x14
+#define jpeg_idct_15x15		jRD15x15
+#define jpeg_idct_16x16		jRD16x16
+#define jpeg_idct_16x8		jRD16x8
+#define jpeg_idct_14x7		jRD14x7
+#define jpeg_idct_12x6		jRD12x6
+#define jpeg_idct_10x5		jRD10x5
+#define jpeg_idct_8x4		jRD8x4
+#define jpeg_idct_6x3		jRD6x3
+#define jpeg_idct_4x2		jRD4x2
+#define jpeg_idct_2x1		jRD2x1
+#define jpeg_idct_8x16		jRD8x16
+#define jpeg_idct_7x14		jRD7x14
+#define jpeg_idct_6x12		jRD6x12
+#define jpeg_idct_5x10		jRD5x10
+#define jpeg_idct_4x8		jRD4x8
+#define jpeg_idct_3x6		jRD3x8
+#define jpeg_idct_2x4		jRD2x4
+#define jpeg_idct_1x2		jRD1x2
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+/* Extern declarations for the forward and inverse DCT routines. */
+
+EXTERN(void) jpeg_fdct_islow
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_ifast
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_float
+    JPP((FAST_FLOAT * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_7x7
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_6x6
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_5x5
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_4x4
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_3x3
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_2x2
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_1x1
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_9x9
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_10x10
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_11x11
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_12x12
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_13x13
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_14x14
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_15x15
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_16x16
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_16x8
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_14x7
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_12x6
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_10x5
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_8x4
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_6x3
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_4x2
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_2x1
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_8x16
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_7x14
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_6x12
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_5x10
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_4x8
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_3x6
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_2x4
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_1x2
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+
+EXTERN(void) jpeg_idct_islow
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_ifast
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_float
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_7x7
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_6x6
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_5x5
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_4x4
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_3x3
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_2x2
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_1x1
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_9x9
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_10x10
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_11x11
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_12x12
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_13x13
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_14x14
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_15x15
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_16x16
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_16x8
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_14x7
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_12x6
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_10x5
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_8x4
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_6x3
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_4x2
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_2x1
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_8x16
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_7x14
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_6x12
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_5x10
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_4x8
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_3x6
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_2x4
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_1x2
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+
+
+/*
+ * Macros for handling fixed-point arithmetic; these are used by many
+ * but not all of the DCT/IDCT modules.
+ *
+ * All values are expected to be of type INT32.
+ * Fractional constants are scaled left by CONST_BITS bits.
+ * CONST_BITS is defined within each module using these macros,
+ * and may differ from one module to the next.
+ */
+
+#define ONE	((INT32) 1)
+#define CONST_SCALE (ONE << CONST_BITS)
+
+/* Convert a positive real constant to an integer scaled by CONST_SCALE.
+ * Caution: some C compilers fail to reduce "FIX(constant)" at compile time,
+ * thus causing a lot of useless floating-point operations at run time.
+ */
+
+#define FIX(x)	((INT32) ((x) * CONST_SCALE + 0.5))
+
+/* Descale and correctly round an INT32 value that's scaled by N bits.
+ * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
+ * the fudge factor is correct for either sign of X.
+ */
+
+#define DESCALE(x,n)  RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
+
+/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
+ * This macro is used only when the two inputs will actually be no more than
+ * 16 bits wide, so that a 16x16->32 bit multiply can be used instead of a
+ * full 32x32 multiply.  This provides a useful speedup on many machines.
+ * Unfortunately there is no way to specify a 16x16->32 multiply portably
+ * in C, but some C compilers will do the right thing if you provide the
+ * correct combination of casts.
+ */
+
+#ifdef SHORTxSHORT_32		/* may work if 'int' is 32 bits */
+#define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((INT16) (const)))
+#endif
+#ifdef SHORTxLCONST_32		/* known to work with Microsoft C 6.0 */
+#define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((INT32) (const)))
+#endif
+
+#ifndef MULTIPLY16C16		/* default definition */
+#define MULTIPLY16C16(var,const)  ((var) * (const))
+#endif
+
+/* Same except both inputs are variables. */
+
+#ifdef SHORTxSHORT_32		/* may work if 'int' is 32 bits */
+#define MULTIPLY16V16(var1,var2)  (((INT16) (var1)) * ((INT16) (var2)))
+#endif
+
+#ifndef MULTIPLY16V16		/* default definition */
+#define MULTIPLY16V16(var1,var2)  ((var1) * (var2))
+#endif
+
+/* Like RIGHT_SHIFT, but applies to a DCTELEM.
+ * We assume that int right shift is unsigned if INT32 right shift is.
+ */
+
+#ifdef RIGHT_SHIFT_IS_UNSIGNED
+#define ISHIFT_TEMPS	DCTELEM ishift_temp;
+#if BITS_IN_JSAMPLE == 8
+#define DCTELEMBITS  16		/* DCTELEM may be 16 or 32 bits */
+#else
+#define DCTELEMBITS  32		/* DCTELEM must be 32 bits */
+#endif
+#define IRIGHT_SHIFT(x,shft)  \
+    ((ishift_temp = (x)) < 0 ? \
+     (ishift_temp >> (shft)) | ((~((DCTELEM) 0)) << (DCTELEMBITS-(shft))) : \
+     (ishift_temp >> (shft)))
+#else
+#define ISHIFT_TEMPS
+#define IRIGHT_SHIFT(x,shft)	((x) >> (shft))
+#endif
diff --git a/libraries/jpeg/jddctmgr.c b/libraries/jpeg/jddctmgr.c
new file mode 100644
index 000000000..9ecfbb510
--- /dev/null
+++ b/libraries/jpeg/jddctmgr.c
@@ -0,0 +1,384 @@
+/*
+ * jddctmgr.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2002-2013 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the inverse-DCT management logic.
+ * This code selects a particular IDCT implementation to be used,
+ * and it performs related housekeeping chores.  No code in this file
+ * is executed per IDCT step, only during output pass setup.
+ *
+ * Note that the IDCT routines are responsible for performing coefficient
+ * dequantization as well as the IDCT proper.  This module sets up the
+ * dequantization multiplier table needed by the IDCT routine.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+
+/*
+ * The decompressor input side (jdinput.c) saves away the appropriate
+ * quantization table for each component at the start of the first scan
+ * involving that component.  (This is necessary in order to correctly
+ * decode files that reuse Q-table slots.)
+ * When we are ready to make an output pass, the saved Q-table is converted
+ * to a multiplier table that will actually be used by the IDCT routine.
+ * The multiplier table contents are IDCT-method-dependent.  To support
+ * application changes in IDCT method between scans, we can remake the
+ * multiplier tables if necessary.
+ * In buffered-image mode, the first output pass may occur before any data
+ * has been seen for some components, and thus before their Q-tables have
+ * been saved away.  To handle this case, multiplier tables are preset
+ * to zeroes; the result of the IDCT will be a neutral gray level.
+ */
+
+
+/* Private subobject for this module */
+
+typedef struct {
+  struct jpeg_inverse_dct pub;	/* public fields */
+
+  /* This array contains the IDCT method code that each multiplier table
+   * is currently set up for, or -1 if it's not yet set up.
+   * The actual multiplier tables are pointed to by dct_table in the
+   * per-component comp_info structures.
+   */
+  int cur_method[MAX_COMPONENTS];
+} my_idct_controller;
+
+typedef my_idct_controller * my_idct_ptr;
+
+
+/* Allocated multiplier tables: big enough for any supported variant */
+
+typedef union {
+  ISLOW_MULT_TYPE islow_array[DCTSIZE2];
+#ifdef DCT_IFAST_SUPPORTED
+  IFAST_MULT_TYPE ifast_array[DCTSIZE2];
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+  FLOAT_MULT_TYPE float_array[DCTSIZE2];
+#endif
+} multiplier_table;
+
+
+/* The current scaled-IDCT routines require ISLOW-style multiplier tables,
+ * so be sure to compile that code if either ISLOW or SCALING is requested.
+ */
+#ifdef DCT_ISLOW_SUPPORTED
+#define PROVIDE_ISLOW_TABLES
+#else
+#ifdef IDCT_SCALING_SUPPORTED
+#define PROVIDE_ISLOW_TABLES
+#endif
+#endif
+
+
+/*
+ * Prepare for an output pass.
+ * Here we select the proper IDCT routine for each component and build
+ * a matching multiplier table.
+ */
+
+METHODDEF(void)
+start_pass (j_decompress_ptr cinfo)
+{
+  my_idct_ptr idct = (my_idct_ptr) cinfo->idct;
+  int ci, i;
+  jpeg_component_info *compptr;
+  int method = 0;
+  inverse_DCT_method_ptr method_ptr = NULL;
+  JQUANT_TBL * qtbl;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Select the proper IDCT routine for this component's scaling */
+    switch ((compptr->DCT_h_scaled_size << 8) + compptr->DCT_v_scaled_size) {
+#ifdef IDCT_SCALING_SUPPORTED
+    case ((1 << 8) + 1):
+      method_ptr = jpeg_idct_1x1;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((2 << 8) + 2):
+      method_ptr = jpeg_idct_2x2;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((3 << 8) + 3):
+      method_ptr = jpeg_idct_3x3;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((4 << 8) + 4):
+      method_ptr = jpeg_idct_4x4;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((5 << 8) + 5):
+      method_ptr = jpeg_idct_5x5;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((6 << 8) + 6):
+      method_ptr = jpeg_idct_6x6;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((7 << 8) + 7):
+      method_ptr = jpeg_idct_7x7;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((9 << 8) + 9):
+      method_ptr = jpeg_idct_9x9;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((10 << 8) + 10):
+      method_ptr = jpeg_idct_10x10;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((11 << 8) + 11):
+      method_ptr = jpeg_idct_11x11;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((12 << 8) + 12):
+      method_ptr = jpeg_idct_12x12;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((13 << 8) + 13):
+      method_ptr = jpeg_idct_13x13;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((14 << 8) + 14):
+      method_ptr = jpeg_idct_14x14;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((15 << 8) + 15):
+      method_ptr = jpeg_idct_15x15;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((16 << 8) + 16):
+      method_ptr = jpeg_idct_16x16;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((16 << 8) + 8):
+      method_ptr = jpeg_idct_16x8;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((14 << 8) + 7):
+      method_ptr = jpeg_idct_14x7;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((12 << 8) + 6):
+      method_ptr = jpeg_idct_12x6;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((10 << 8) + 5):
+      method_ptr = jpeg_idct_10x5;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((8 << 8) + 4):
+      method_ptr = jpeg_idct_8x4;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((6 << 8) + 3):
+      method_ptr = jpeg_idct_6x3;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((4 << 8) + 2):
+      method_ptr = jpeg_idct_4x2;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((2 << 8) + 1):
+      method_ptr = jpeg_idct_2x1;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((8 << 8) + 16):
+      method_ptr = jpeg_idct_8x16;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((7 << 8) + 14):
+      method_ptr = jpeg_idct_7x14;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((6 << 8) + 12):
+      method_ptr = jpeg_idct_6x12;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((5 << 8) + 10):
+      method_ptr = jpeg_idct_5x10;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((4 << 8) + 8):
+      method_ptr = jpeg_idct_4x8;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((3 << 8) + 6):
+      method_ptr = jpeg_idct_3x6;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((2 << 8) + 4):
+      method_ptr = jpeg_idct_2x4;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((1 << 8) + 2):
+      method_ptr = jpeg_idct_1x2;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+#endif
+    case ((DCTSIZE << 8) + DCTSIZE):
+      switch (cinfo->dct_method) {
+#ifdef DCT_ISLOW_SUPPORTED
+      case JDCT_ISLOW:
+	method_ptr = jpeg_idct_islow;
+	method = JDCT_ISLOW;
+	break;
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+      case JDCT_IFAST:
+	method_ptr = jpeg_idct_ifast;
+	method = JDCT_IFAST;
+	break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+      case JDCT_FLOAT:
+	method_ptr = jpeg_idct_float;
+	method = JDCT_FLOAT;
+	break;
+#endif
+      default:
+	ERREXIT(cinfo, JERR_NOT_COMPILED);
+	break;
+      }
+      break;
+    default:
+      ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
+	       compptr->DCT_h_scaled_size, compptr->DCT_v_scaled_size);
+      break;
+    }
+    idct->pub.inverse_DCT[ci] = method_ptr;
+    /* Create multiplier table from quant table.
+     * However, we can skip this if the component is uninteresting
+     * or if we already built the table.  Also, if no quant table
+     * has yet been saved for the component, we leave the
+     * multiplier table all-zero; we'll be reading zeroes from the
+     * coefficient controller's buffer anyway.
+     */
+    if (! compptr->component_needed || idct->cur_method[ci] == method)
+      continue;
+    qtbl = compptr->quant_table;
+    if (qtbl == NULL)		/* happens if no data yet for component */
+      continue;
+    idct->cur_method[ci] = method;
+    switch (method) {
+#ifdef PROVIDE_ISLOW_TABLES
+    case JDCT_ISLOW:
+      {
+	/* For LL&M IDCT method, multipliers are equal to raw quantization
+	 * coefficients, but are stored as ints to ensure access efficiency.
+	 */
+	ISLOW_MULT_TYPE * ismtbl = (ISLOW_MULT_TYPE *) compptr->dct_table;
+	for (i = 0; i < DCTSIZE2; i++) {
+	  ismtbl[i] = (ISLOW_MULT_TYPE) qtbl->quantval[i];
+	}
+      }
+      break;
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+    case JDCT_IFAST:
+      {
+	/* For AA&N IDCT method, multipliers are equal to quantization
+	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
+	 *   scalefactor[0] = 1
+	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+	 * For integer operation, the multiplier table is to be scaled by
+	 * IFAST_SCALE_BITS.
+	 */
+	IFAST_MULT_TYPE * ifmtbl = (IFAST_MULT_TYPE *) compptr->dct_table;
+#define CONST_BITS 14
+	static const INT16 aanscales[DCTSIZE2] = {
+	  /* precomputed values scaled up by 14 bits */
+	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+	  22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
+	  21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
+	  19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
+	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+	  12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
+	   8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
+	   4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
+	};
+	SHIFT_TEMPS
+
+	for (i = 0; i < DCTSIZE2; i++) {
+	  ifmtbl[i] = (IFAST_MULT_TYPE)
+	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
+				  (INT32) aanscales[i]),
+		    CONST_BITS-IFAST_SCALE_BITS);
+	}
+      }
+      break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+    case JDCT_FLOAT:
+      {
+	/* For float AA&N IDCT method, multipliers are equal to quantization
+	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
+	 *   scalefactor[0] = 1
+	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+	 * We apply a further scale factor of 1/8.
+	 */
+	FLOAT_MULT_TYPE * fmtbl = (FLOAT_MULT_TYPE *) compptr->dct_table;
+	int row, col;
+	static const double aanscalefactor[DCTSIZE] = {
+	  1.0, 1.387039845, 1.306562965, 1.175875602,
+	  1.0, 0.785694958, 0.541196100, 0.275899379
+	};
+
+	i = 0;
+	for (row = 0; row < DCTSIZE; row++) {
+	  for (col = 0; col < DCTSIZE; col++) {
+	    fmtbl[i] = (FLOAT_MULT_TYPE)
+	      ((double) qtbl->quantval[i] *
+	       aanscalefactor[row] * aanscalefactor[col] * 0.125);
+	    i++;
+	  }
+	}
+      }
+      break;
+#endif
+    default:
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+      break;
+    }
+  }
+}
+
+
+/*
+ * Initialize IDCT manager.
+ */
+
+GLOBAL(void)
+jinit_inverse_dct (j_decompress_ptr cinfo)
+{
+  my_idct_ptr idct;
+  int ci;
+  jpeg_component_info *compptr;
+
+  idct = (my_idct_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_idct_controller));
+  cinfo->idct = &idct->pub;
+  idct->pub.start_pass = start_pass;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Allocate and pre-zero a multiplier table for each component */
+    compptr->dct_table =
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(multiplier_table));
+    MEMZERO(compptr->dct_table, SIZEOF(multiplier_table));
+    /* Mark multiplier table not yet set up for any method */
+    idct->cur_method[ci] = -1;
+  }
+}
diff --git a/libraries/jpeg/jdhuff.c b/libraries/jpeg/jdhuff.c
new file mode 100644
index 000000000..835d06ecb
--- /dev/null
+++ b/libraries/jpeg/jdhuff.c
@@ -0,0 +1,1553 @@
+/*
+ * jdhuff.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2006-2016 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains Huffman entropy decoding routines.
+ * Both sequential and progressive modes are supported in this single module.
+ *
+ * Much of the complexity here has to do with supporting input suspension.
+ * If the data source module demands suspension, we want to be able to back
+ * up to the start of the current MCU.  To do this, we copy state variables
+ * into local working storage, and update them back to the permanent
+ * storage only upon successful completion of an MCU.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Derived data constructed for each Huffman table */
+
+#define HUFF_LOOKAHEAD	8	/* # of bits of lookahead */
+
+typedef struct {
+  /* Basic tables: (element [0] of each array is unused) */
+  INT32 maxcode[18];		/* largest code of length k (-1 if none) */
+  /* (maxcode[17] is a sentinel to ensure jpeg_huff_decode terminates) */
+  INT32 valoffset[17];		/* huffval[] offset for codes of length k */
+  /* valoffset[k] = huffval[] index of 1st symbol of code length k, less
+   * the smallest code of length k; so given a code of length k, the
+   * corresponding symbol is huffval[code + valoffset[k]]
+   */
+
+  /* Link to public Huffman table (needed only in jpeg_huff_decode) */
+  JHUFF_TBL *pub;
+
+  /* Lookahead tables: indexed by the next HUFF_LOOKAHEAD bits of
+   * the input data stream.  If the next Huffman code is no more
+   * than HUFF_LOOKAHEAD bits long, we can obtain its length and
+   * the corresponding symbol directly from these tables.
+   */
+  int look_nbits[1<<HUFF_LOOKAHEAD]; /* # bits, or 0 if too long */
+  UINT8 look_sym[1<<HUFF_LOOKAHEAD]; /* symbol, or unused */
+} d_derived_tbl;
+
+
+/*
+ * Fetching the next N bits from the input stream is a time-critical operation
+ * for the Huffman decoders.  We implement it with a combination of inline
+ * macros and out-of-line subroutines.  Note that N (the number of bits
+ * demanded at one time) never exceeds 15 for JPEG use.
+ *
+ * We read source bytes into get_buffer and dole out bits as needed.
+ * If get_buffer already contains enough bits, they are fetched in-line
+ * by the macros CHECK_BIT_BUFFER and GET_BITS.  When there aren't enough
+ * bits, jpeg_fill_bit_buffer is called; it will attempt to fill get_buffer
+ * as full as possible (not just to the number of bits needed; this
+ * prefetching reduces the overhead cost of calling jpeg_fill_bit_buffer).
+ * Note that jpeg_fill_bit_buffer may return FALSE to indicate suspension.
+ * On TRUE return, jpeg_fill_bit_buffer guarantees that get_buffer contains
+ * at least the requested number of bits --- dummy zeroes are inserted if
+ * necessary.
+ */
+
+typedef INT32 bit_buf_type;	/* type of bit-extraction buffer */
+#define BIT_BUF_SIZE  32	/* size of buffer in bits */
+
+/* If long is > 32 bits on your machine, and shifting/masking longs is
+ * reasonably fast, making bit_buf_type be long and setting BIT_BUF_SIZE
+ * appropriately should be a win.  Unfortunately we can't define the size
+ * with something like  #define BIT_BUF_SIZE (sizeof(bit_buf_type)*8)
+ * because not all machines measure sizeof in 8-bit bytes.
+ */
+
+typedef struct {		/* Bitreading state saved across MCUs */
+  bit_buf_type get_buffer;	/* current bit-extraction buffer */
+  int bits_left;		/* # of unused bits in it */
+} bitread_perm_state;
+
+typedef struct {		/* Bitreading working state within an MCU */
+  /* Current data source location */
+  /* We need a copy, rather than munging the original, in case of suspension */
+  const JOCTET * next_input_byte; /* => next byte to read from source */
+  size_t bytes_in_buffer;	/* # of bytes remaining in source buffer */
+  /* Bit input buffer --- note these values are kept in register variables,
+   * not in this struct, inside the inner loops.
+   */
+  bit_buf_type get_buffer;	/* current bit-extraction buffer */
+  int bits_left;		/* # of unused bits in it */
+  /* Pointer needed by jpeg_fill_bit_buffer. */
+  j_decompress_ptr cinfo;	/* back link to decompress master record */
+} bitread_working_state;
+
+/* Macros to declare and load/save bitread local variables. */
+#define BITREAD_STATE_VARS  \
+	register bit_buf_type get_buffer;  \
+	register int bits_left;  \
+	bitread_working_state br_state
+
+#define BITREAD_LOAD_STATE(cinfop,permstate)  \
+	br_state.cinfo = cinfop; \
+	br_state.next_input_byte = cinfop->src->next_input_byte; \
+	br_state.bytes_in_buffer = cinfop->src->bytes_in_buffer; \
+	get_buffer = permstate.get_buffer; \
+	bits_left = permstate.bits_left;
+
+#define BITREAD_SAVE_STATE(cinfop,permstate)  \
+	cinfop->src->next_input_byte = br_state.next_input_byte; \
+	cinfop->src->bytes_in_buffer = br_state.bytes_in_buffer; \
+	permstate.get_buffer = get_buffer; \
+	permstate.bits_left = bits_left
+
+/*
+ * These macros provide the in-line portion of bit fetching.
+ * Use CHECK_BIT_BUFFER to ensure there are N bits in get_buffer
+ * before using GET_BITS, PEEK_BITS, or DROP_BITS.
+ * The variables get_buffer and bits_left are assumed to be locals,
+ * but the state struct might not be (jpeg_huff_decode needs this).
+ *	CHECK_BIT_BUFFER(state,n,action);
+ *		Ensure there are N bits in get_buffer; if suspend, take action.
+ *      val = GET_BITS(n);
+ *		Fetch next N bits.
+ *      val = PEEK_BITS(n);
+ *		Fetch next N bits without removing them from the buffer.
+ *	DROP_BITS(n);
+ *		Discard next N bits.
+ * The value N should be a simple variable, not an expression, because it
+ * is evaluated multiple times.
+ */
+
+#define CHECK_BIT_BUFFER(state,nbits,action) \
+	{ if (bits_left < (nbits)) {  \
+	    if (! jpeg_fill_bit_buffer(&(state),get_buffer,bits_left,nbits))  \
+	      { action; }  \
+	    get_buffer = (state).get_buffer; bits_left = (state).bits_left; } }
+
+#define GET_BITS(nbits) \
+	(((int) (get_buffer >> (bits_left -= (nbits)))) & BIT_MASK(nbits))
+
+#define PEEK_BITS(nbits) \
+	(((int) (get_buffer >> (bits_left -  (nbits)))) & BIT_MASK(nbits))
+
+#define DROP_BITS(nbits) \
+	(bits_left -= (nbits))
+
+
+/*
+ * Code for extracting next Huffman-coded symbol from input bit stream.
+ * Again, this is time-critical and we make the main paths be macros.
+ *
+ * We use a lookahead table to process codes of up to HUFF_LOOKAHEAD bits
+ * without looping.  Usually, more than 95% of the Huffman codes will be 8
+ * or fewer bits long.  The few overlength codes are handled with a loop,
+ * which need not be inline code.
+ *
+ * Notes about the HUFF_DECODE macro:
+ * 1. Near the end of the data segment, we may fail to get enough bits
+ *    for a lookahead.  In that case, we do it the hard way.
+ * 2. If the lookahead table contains no entry, the next code must be
+ *    more than HUFF_LOOKAHEAD bits long.
+ * 3. jpeg_huff_decode returns -1 if forced to suspend.
+ */
+
+#define HUFF_DECODE(result,state,htbl,failaction,slowlabel) \
+{ register int nb, look; \
+  if (bits_left < HUFF_LOOKAHEAD) { \
+    if (! jpeg_fill_bit_buffer(&state,get_buffer,bits_left, 0)) {failaction;} \
+    get_buffer = state.get_buffer; bits_left = state.bits_left; \
+    if (bits_left < HUFF_LOOKAHEAD) { \
+      nb = 1; goto slowlabel; \
+    } \
+  } \
+  look = PEEK_BITS(HUFF_LOOKAHEAD); \
+  if ((nb = htbl->look_nbits[look]) != 0) { \
+    DROP_BITS(nb); \
+    result = htbl->look_sym[look]; \
+  } else { \
+    nb = HUFF_LOOKAHEAD+1; \
+slowlabel: \
+    if ((result=jpeg_huff_decode(&state,get_buffer,bits_left,htbl,nb)) < 0) \
+	{ failaction; } \
+    get_buffer = state.get_buffer; bits_left = state.bits_left; \
+  } \
+}
+
+
+/*
+ * Expanded entropy decoder object for Huffman decoding.
+ *
+ * The savable_state subrecord contains fields that change within an MCU,
+ * but must not be updated permanently until we complete the MCU.
+ */
+
+typedef struct {
+  unsigned int EOBRUN;			/* remaining EOBs in EOBRUN */
+  int last_dc_val[MAX_COMPS_IN_SCAN];	/* last DC coef for each component */
+} savable_state;
+
+/* This macro is to work around compilers with missing or broken
+ * structure assignment.  You'll need to fix this code if you have
+ * such a compiler and you change MAX_COMPS_IN_SCAN.
+ */
+
+#ifndef NO_STRUCT_ASSIGN
+#define ASSIGN_STATE(dest,src)  ((dest) = (src))
+#else
+#if MAX_COMPS_IN_SCAN == 4
+#define ASSIGN_STATE(dest,src)  \
+	((dest).EOBRUN = (src).EOBRUN, \
+	 (dest).last_dc_val[0] = (src).last_dc_val[0], \
+	 (dest).last_dc_val[1] = (src).last_dc_val[1], \
+	 (dest).last_dc_val[2] = (src).last_dc_val[2], \
+	 (dest).last_dc_val[3] = (src).last_dc_val[3])
+#endif
+#endif
+
+
+typedef struct {
+  struct jpeg_entropy_decoder pub; /* public fields */
+
+  /* These fields are loaded into local variables at start of each MCU.
+   * In case of suspension, we exit WITHOUT updating them.
+   */
+  bitread_perm_state bitstate;	/* Bit buffer at start of MCU */
+  savable_state saved;		/* Other state at start of MCU */
+
+  /* These fields are NOT loaded into local working state. */
+  boolean insufficient_data;	/* set TRUE after emitting warning */
+  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
+
+  /* Following two fields used only in progressive mode */
+
+  /* Pointers to derived tables (these workspaces have image lifespan) */
+  d_derived_tbl * derived_tbls[NUM_HUFF_TBLS];
+
+  d_derived_tbl * ac_derived_tbl; /* active table during an AC scan */
+
+  /* Following fields used only in sequential mode */
+
+  /* Pointers to derived tables (these workspaces have image lifespan) */
+  d_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS];
+  d_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS];
+
+  /* Precalculated info set up by start_pass for use in decode_mcu: */
+
+  /* Pointers to derived tables to be used for each block within an MCU */
+  d_derived_tbl * dc_cur_tbls[D_MAX_BLOCKS_IN_MCU];
+  d_derived_tbl * ac_cur_tbls[D_MAX_BLOCKS_IN_MCU];
+  /* Whether we care about the DC and AC coefficient values for each block */
+  int coef_limit[D_MAX_BLOCKS_IN_MCU];
+} huff_entropy_decoder;
+
+typedef huff_entropy_decoder * huff_entropy_ptr;
+
+
+static const int jpeg_zigzag_order[8][8] = {
+  {  0,  1,  5,  6, 14, 15, 27, 28 },
+  {  2,  4,  7, 13, 16, 26, 29, 42 },
+  {  3,  8, 12, 17, 25, 30, 41, 43 },
+  {  9, 11, 18, 24, 31, 40, 44, 53 },
+  { 10, 19, 23, 32, 39, 45, 52, 54 },
+  { 20, 22, 33, 38, 46, 51, 55, 60 },
+  { 21, 34, 37, 47, 50, 56, 59, 61 },
+  { 35, 36, 48, 49, 57, 58, 62, 63 }
+};
+
+static const int jpeg_zigzag_order7[7][7] = {
+  {  0,  1,  5,  6, 14, 15, 27 },
+  {  2,  4,  7, 13, 16, 26, 28 },
+  {  3,  8, 12, 17, 25, 29, 38 },
+  {  9, 11, 18, 24, 30, 37, 39 },
+  { 10, 19, 23, 31, 36, 40, 45 },
+  { 20, 22, 32, 35, 41, 44, 46 },
+  { 21, 33, 34, 42, 43, 47, 48 }
+};
+
+static const int jpeg_zigzag_order6[6][6] = {
+  {  0,  1,  5,  6, 14, 15 },
+  {  2,  4,  7, 13, 16, 25 },
+  {  3,  8, 12, 17, 24, 26 },
+  {  9, 11, 18, 23, 27, 32 },
+  { 10, 19, 22, 28, 31, 33 },
+  { 20, 21, 29, 30, 34, 35 }
+};
+
+static const int jpeg_zigzag_order5[5][5] = {
+  {  0,  1,  5,  6, 14 },
+  {  2,  4,  7, 13, 15 },
+  {  3,  8, 12, 16, 21 },
+  {  9, 11, 17, 20, 22 },
+  { 10, 18, 19, 23, 24 }
+};
+
+static const int jpeg_zigzag_order4[4][4] = {
+  { 0,  1,  5,  6 },
+  { 2,  4,  7, 12 },
+  { 3,  8, 11, 13 },
+  { 9, 10, 14, 15 }
+};
+
+static const int jpeg_zigzag_order3[3][3] = {
+  { 0, 1, 5 },
+  { 2, 4, 6 },
+  { 3, 7, 8 }
+};
+
+static const int jpeg_zigzag_order2[2][2] = {
+  { 0, 1 },
+  { 2, 3 }
+};
+
+
+/*
+ * Compute the derived values for a Huffman table.
+ * This routine also performs some validation checks on the table.
+ */
+
+LOCAL(void)
+jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno,
+			 d_derived_tbl ** pdtbl)
+{
+  JHUFF_TBL *htbl;
+  d_derived_tbl *dtbl;
+  int p, i, l, si, numsymbols;
+  int lookbits, ctr;
+  char huffsize[257];
+  unsigned int huffcode[257];
+  unsigned int code;
+
+  /* Note that huffsize[] and huffcode[] are filled in code-length order,
+   * paralleling the order of the symbols themselves in htbl->huffval[].
+   */
+
+  /* Find the input Huffman table */
+  if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
+  htbl =
+    isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
+  if (htbl == NULL)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
+
+  /* Allocate a workspace if we haven't already done so. */
+  if (*pdtbl == NULL)
+    *pdtbl = (d_derived_tbl *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(d_derived_tbl));
+  dtbl = *pdtbl;
+  dtbl->pub = htbl;		/* fill in back link */
+  
+  /* Figure C.1: make table of Huffman code length for each symbol */
+
+  p = 0;
+  for (l = 1; l <= 16; l++) {
+    i = (int) htbl->bits[l];
+    if (i < 0 || p + i > 256)	/* protect against table overrun */
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    while (i--)
+      huffsize[p++] = (char) l;
+  }
+  huffsize[p] = 0;
+  numsymbols = p;
+  
+  /* Figure C.2: generate the codes themselves */
+  /* We also validate that the counts represent a legal Huffman code tree. */
+  
+  code = 0;
+  si = huffsize[0];
+  p = 0;
+  while (huffsize[p]) {
+    while (((int) huffsize[p]) == si) {
+      huffcode[p++] = code;
+      code++;
+    }
+    /* code is now 1 more than the last code used for codelength si; but
+     * it must still fit in si bits, since no code is allowed to be all ones.
+     */
+    if (((INT32) code) >= (((INT32) 1) << si))
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    code <<= 1;
+    si++;
+  }
+
+  /* Figure F.15: generate decoding tables for bit-sequential decoding */
+
+  p = 0;
+  for (l = 1; l <= 16; l++) {
+    if (htbl->bits[l]) {
+      /* valoffset[l] = huffval[] index of 1st symbol of code length l,
+       * minus the minimum code of length l
+       */
+      dtbl->valoffset[l] = (INT32) p - (INT32) huffcode[p];
+      p += htbl->bits[l];
+      dtbl->maxcode[l] = huffcode[p-1]; /* maximum code of length l */
+    } else {
+      dtbl->maxcode[l] = -1;	/* -1 if no codes of this length */
+    }
+  }
+  dtbl->maxcode[17] = 0xFFFFFL; /* ensures jpeg_huff_decode terminates */
+
+  /* Compute lookahead tables to speed up decoding.
+   * First we set all the table entries to 0, indicating "too long";
+   * then we iterate through the Huffman codes that are short enough and
+   * fill in all the entries that correspond to bit sequences starting
+   * with that code.
+   */
+
+  MEMZERO(dtbl->look_nbits, SIZEOF(dtbl->look_nbits));
+
+  p = 0;
+  for (l = 1; l <= HUFF_LOOKAHEAD; l++) {
+    for (i = 1; i <= (int) htbl->bits[l]; i++, p++) {
+      /* l = current code's length, p = its index in huffcode[] & huffval[]. */
+      /* Generate left-justified code followed by all possible bit sequences */
+      lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l);
+      for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--) {
+	dtbl->look_nbits[lookbits] = l;
+	dtbl->look_sym[lookbits] = htbl->huffval[p];
+	lookbits++;
+      }
+    }
+  }
+
+  /* Validate symbols as being reasonable.
+   * For AC tables, we make no check, but accept all byte values 0..255.
+   * For DC tables, we require the symbols to be in range 0..15.
+   * (Tighter bounds could be applied depending on the data depth and mode,
+   * but this is sufficient to ensure safe decoding.)
+   */
+  if (isDC) {
+    for (i = 0; i < numsymbols; i++) {
+      int sym = htbl->huffval[i];
+      if (sym < 0 || sym > 15)
+	ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    }
+  }
+}
+
+
+/*
+ * Out-of-line code for bit fetching.
+ * Note: current values of get_buffer and bits_left are passed as parameters,
+ * but are returned in the corresponding fields of the state struct.
+ *
+ * On most machines MIN_GET_BITS should be 25 to allow the full 32-bit width
+ * of get_buffer to be used.  (On machines with wider words, an even larger
+ * buffer could be used.)  However, on some machines 32-bit shifts are
+ * quite slow and take time proportional to the number of places shifted.
+ * (This is true with most PC compilers, for instance.)  In this case it may
+ * be a win to set MIN_GET_BITS to the minimum value of 15.  This reduces the
+ * average shift distance at the cost of more calls to jpeg_fill_bit_buffer.
+ */
+
+#ifdef SLOW_SHIFT_32
+#define MIN_GET_BITS  15	/* minimum allowable value */
+#else
+#define MIN_GET_BITS  (BIT_BUF_SIZE-7)
+#endif
+
+
+LOCAL(boolean)
+jpeg_fill_bit_buffer (bitread_working_state * state,
+		      register bit_buf_type get_buffer, register int bits_left,
+		      int nbits)
+/* Load up the bit buffer to a depth of at least nbits */
+{
+  /* Copy heavily used state fields into locals (hopefully registers) */
+  register const JOCTET * next_input_byte = state->next_input_byte;
+  register size_t bytes_in_buffer = state->bytes_in_buffer;
+  j_decompress_ptr cinfo = state->cinfo;
+
+  /* Attempt to load at least MIN_GET_BITS bits into get_buffer. */
+  /* (It is assumed that no request will be for more than that many bits.) */
+  /* We fail to do so only if we hit a marker or are forced to suspend. */
+
+  if (cinfo->unread_marker == 0) {	/* cannot advance past a marker */
+    while (bits_left < MIN_GET_BITS) {
+      register int c;
+
+      /* Attempt to read a byte */
+      if (bytes_in_buffer == 0) {
+	if (! (*cinfo->src->fill_input_buffer) (cinfo))
+	  return FALSE;
+	next_input_byte = cinfo->src->next_input_byte;
+	bytes_in_buffer = cinfo->src->bytes_in_buffer;
+      }
+      bytes_in_buffer--;
+      c = GETJOCTET(*next_input_byte++);
+
+      /* If it's 0xFF, check and discard stuffed zero byte */
+      if (c == 0xFF) {
+	/* Loop here to discard any padding FF's on terminating marker,
+	 * so that we can save a valid unread_marker value.  NOTE: we will
+	 * accept multiple FF's followed by a 0 as meaning a single FF data
+	 * byte.  This data pattern is not valid according to the standard.
+	 */
+	do {
+	  if (bytes_in_buffer == 0) {
+	    if (! (*cinfo->src->fill_input_buffer) (cinfo))
+	      return FALSE;
+	    next_input_byte = cinfo->src->next_input_byte;
+	    bytes_in_buffer = cinfo->src->bytes_in_buffer;
+	  }
+	  bytes_in_buffer--;
+	  c = GETJOCTET(*next_input_byte++);
+	} while (c == 0xFF);
+
+	if (c == 0) {
+	  /* Found FF/00, which represents an FF data byte */
+	  c = 0xFF;
+	} else {
+	  /* Oops, it's actually a marker indicating end of compressed data.
+	   * Save the marker code for later use.
+	   * Fine point: it might appear that we should save the marker into
+	   * bitread working state, not straight into permanent state.  But
+	   * once we have hit a marker, we cannot need to suspend within the
+	   * current MCU, because we will read no more bytes from the data
+	   * source.  So it is OK to update permanent state right away.
+	   */
+	  cinfo->unread_marker = c;
+	  /* See if we need to insert some fake zero bits. */
+	  goto no_more_bytes;
+	}
+      }
+
+      /* OK, load c into get_buffer */
+      get_buffer = (get_buffer << 8) | c;
+      bits_left += 8;
+    } /* end while */
+  } else {
+  no_more_bytes:
+    /* We get here if we've read the marker that terminates the compressed
+     * data segment.  There should be enough bits in the buffer register
+     * to satisfy the request; if so, no problem.
+     */
+    if (nbits > bits_left) {
+      /* Uh-oh.  Report corrupted data to user and stuff zeroes into
+       * the data stream, so that we can produce some kind of image.
+       * We use a nonvolatile flag to ensure that only one warning message
+       * appears per data segment.
+       */
+      if (! ((huff_entropy_ptr) cinfo->entropy)->insufficient_data) {
+	WARNMS(cinfo, JWRN_HIT_MARKER);
+	((huff_entropy_ptr) cinfo->entropy)->insufficient_data = TRUE;
+      }
+      /* Fill the buffer with zero bits */
+      get_buffer <<= MIN_GET_BITS - bits_left;
+      bits_left = MIN_GET_BITS;
+    }
+  }
+
+  /* Unload the local registers */
+  state->next_input_byte = next_input_byte;
+  state->bytes_in_buffer = bytes_in_buffer;
+  state->get_buffer = get_buffer;
+  state->bits_left = bits_left;
+
+  return TRUE;
+}
+
+
+/*
+ * Figure F.12: extend sign bit.
+ * On some machines, a shift and sub will be faster than a table lookup.
+ */
+
+#ifdef AVOID_TABLES
+
+#define BIT_MASK(nbits)   ((1<<(nbits))-1)
+#define HUFF_EXTEND(x,s)  ((x) < (1<<((s)-1)) ? (x) - ((1<<(s))-1) : (x))
+
+#else
+
+#define BIT_MASK(nbits)   bmask[nbits]
+#define HUFF_EXTEND(x,s)  ((x) <= bmask[(s) - 1] ? (x) - bmask[s] : (x))
+
+static const int bmask[16] =	/* bmask[n] is mask for n rightmost bits */
+  { 0, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF,
+    0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF };
+
+#endif /* AVOID_TABLES */
+
+
+/*
+ * Out-of-line code for Huffman code decoding.
+ */
+
+LOCAL(int)
+jpeg_huff_decode (bitread_working_state * state,
+		  register bit_buf_type get_buffer, register int bits_left,
+		  d_derived_tbl * htbl, int min_bits)
+{
+  register int l = min_bits;
+  register INT32 code;
+
+  /* HUFF_DECODE has determined that the code is at least min_bits */
+  /* bits long, so fetch that many bits in one swoop. */
+
+  CHECK_BIT_BUFFER(*state, l, return -1);
+  code = GET_BITS(l);
+
+  /* Collect the rest of the Huffman code one bit at a time. */
+  /* This is per Figure F.16 in the JPEG spec. */
+
+  while (code > htbl->maxcode[l]) {
+    code <<= 1;
+    CHECK_BIT_BUFFER(*state, 1, return -1);
+    code |= GET_BITS(1);
+    l++;
+  }
+
+  /* Unload the local registers */
+  state->get_buffer = get_buffer;
+  state->bits_left = bits_left;
+
+  /* With garbage input we may reach the sentinel value l = 17. */
+
+  if (l > 16) {
+    WARNMS(state->cinfo, JWRN_HUFF_BAD_CODE);
+    return 0;			/* fake a zero as the safest result */
+  }
+
+  return htbl->pub->huffval[ (int) (code + htbl->valoffset[l]) ];
+}
+
+
+/*
+ * Finish up at the end of a Huffman-compressed scan.
+ */
+
+METHODDEF(void)
+finish_pass_huff (j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+
+  /* Throw away any unused bits remaining in bit buffer; */
+  /* include any full bytes in next_marker's count of discarded bytes */
+  cinfo->marker->discarded_bytes += entropy->bitstate.bits_left / 8;
+  entropy->bitstate.bits_left = 0;
+}
+
+
+/*
+ * Check for a restart marker & resynchronize decoder.
+ * Returns FALSE if must suspend.
+ */
+
+LOCAL(boolean)
+process_restart (j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int ci;
+
+  finish_pass_huff(cinfo);
+
+  /* Advance past the RSTn marker */
+  if (! (*cinfo->marker->read_restart_marker) (cinfo))
+    return FALSE;
+
+  /* Re-initialize DC predictions to 0 */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++)
+    entropy->saved.last_dc_val[ci] = 0;
+  /* Re-init EOB run count, too */
+  entropy->saved.EOBRUN = 0;
+
+  /* Reset restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+
+  /* Reset out-of-data flag, unless read_restart_marker left us smack up
+   * against a marker.  In that case we will end up treating the next data
+   * segment as empty, and we can avoid producing bogus output pixels by
+   * leaving the flag set.
+   */
+  if (cinfo->unread_marker == 0)
+    entropy->insufficient_data = FALSE;
+
+  return TRUE;
+}
+
+
+/*
+ * Huffman MCU decoding.
+ * Each of these routines decodes and returns one MCU's worth of
+ * Huffman-compressed coefficients. 
+ * The coefficients are reordered from zigzag order into natural array order,
+ * but are not dequantized.
+ *
+ * The i'th block of the MCU is stored into the block pointed to by
+ * MCU_data[i].  WE ASSUME THIS AREA IS INITIALLY ZEROED BY THE CALLER.
+ * (Wholesale zeroing is usually a little faster than retail...)
+ *
+ * We return FALSE if data source requested suspension.  In that case no
+ * changes have been made to permanent state.  (Exception: some output
+ * coefficients may already have been assigned.  This is harmless for
+ * spectral selection, since we'll just re-assign them on the next call.
+ * Successive approximation AC refinement has to be more careful, however.)
+ */
+
+/*
+ * MCU decoding for DC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{   
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int Al = cinfo->Al;
+  register int s, r;
+  int blkn, ci;
+  JBLOCKROW block;
+  BITREAD_STATE_VARS;
+  savable_state state;
+  d_derived_tbl * tbl;
+  jpeg_component_info * compptr;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* If we've run out of data, just leave the MCU set to zeroes.
+   * This way, we return uniform gray for the remainder of the segment.
+   */
+  if (! entropy->insufficient_data) {
+
+    /* Load up working state */
+    BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(state, entropy->saved);
+
+    /* Outer loop handles each block in the MCU */
+
+    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+      block = MCU_data[blkn];
+      ci = cinfo->MCU_membership[blkn];
+      compptr = cinfo->cur_comp_info[ci];
+      tbl = entropy->derived_tbls[compptr->dc_tbl_no];
+
+      /* Decode a single block's worth of coefficients */
+
+      /* Section F.2.2.1: decode the DC coefficient difference */
+      HUFF_DECODE(s, br_state, tbl, return FALSE, label1);
+      if (s) {
+	CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	r = GET_BITS(s);
+	s = HUFF_EXTEND(r, s);
+      }
+
+      /* Convert DC difference to actual value, update last_dc_val */
+      s += state.last_dc_val[ci];
+      state.last_dc_val[ci] = s;
+      /* Scale and output the coefficient (assumes jpeg_natural_order[0]=0) */
+      (*block)[0] = (JCOEF) (s << Al);
+    }
+
+    /* Completed MCU, so update state */
+    BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(entropy->saved, state);
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for AC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{   
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  register int s, k, r;
+  unsigned int EOBRUN;
+  int Se, Al;
+  const int * natural_order;
+  JBLOCKROW block;
+  BITREAD_STATE_VARS;
+  d_derived_tbl * tbl;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* If we've run out of data, just leave the MCU set to zeroes.
+   * This way, we return uniform gray for the remainder of the segment.
+   */
+  if (! entropy->insufficient_data) {
+
+    /* Load up working state.
+     * We can avoid loading/saving bitread state if in an EOB run.
+     */
+    EOBRUN = entropy->saved.EOBRUN;	/* only part of saved state we need */
+
+    /* There is always only one block per MCU */
+
+    if (EOBRUN)			/* if it's a band of zeroes... */
+      EOBRUN--;			/* ...process it now (we do nothing) */
+    else {
+      BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+      Se = cinfo->Se;
+      Al = cinfo->Al;
+      natural_order = cinfo->natural_order;
+      block = MCU_data[0];
+      tbl = entropy->ac_derived_tbl;
+
+      for (k = cinfo->Ss; k <= Se; k++) {
+	HUFF_DECODE(s, br_state, tbl, return FALSE, label2);
+	r = s >> 4;
+	s &= 15;
+	if (s) {
+	  k += r;
+	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	  r = GET_BITS(s);
+	  s = HUFF_EXTEND(r, s);
+	  /* Scale and output coefficient in natural (dezigzagged) order */
+	  (*block)[natural_order[k]] = (JCOEF) (s << Al);
+	} else {
+	  if (r != 15) {	/* EOBr, run length is 2^r + appended bits */
+	    if (r) {		/* EOBr, r > 0 */
+	      EOBRUN = 1 << r;
+	      CHECK_BIT_BUFFER(br_state, r, return FALSE);
+	      r = GET_BITS(r);
+	      EOBRUN += r;
+	      EOBRUN--;		/* this band is processed at this moment */
+	    }
+	    break;		/* force end-of-band */
+	  }
+	  k += 15;		/* ZRL: skip 15 zeroes in band */
+	}
+      }
+
+      BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+    }
+
+    /* Completed MCU, so update state */
+    entropy->saved.EOBRUN = EOBRUN;	/* only part of saved state we need */
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for DC successive approximation refinement scan.
+ * Note: we assume such scans can be multi-component,
+ * although the spec is not very clear on the point.
+ */
+
+METHODDEF(boolean)
+decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{   
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int p1, blkn;
+  BITREAD_STATE_VARS;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* Not worth the cycles to check insufficient_data here,
+   * since we will not change the data anyway if we read zeroes.
+   */
+
+  /* Load up working state */
+  BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+
+  p1 = 1 << cinfo->Al;		/* 1 in the bit position being coded */
+
+  /* Outer loop handles each block in the MCU */
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    /* Encoded data is simply the next bit of the two's-complement DC value */
+    CHECK_BIT_BUFFER(br_state, 1, return FALSE);
+    if (GET_BITS(1))
+      MCU_data[blkn][0][0] |= p1;
+    /* Note: since we use |=, repeating the assignment later is safe */
+  }
+
+  /* Completed MCU, so update state */
+  BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for AC successive approximation refinement scan.
+ */
+
+METHODDEF(boolean)
+decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{   
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  register int s, k, r;
+  unsigned int EOBRUN;
+  int Se, p1, m1;
+  const int * natural_order;
+  JBLOCKROW block;
+  JCOEFPTR thiscoef;
+  BITREAD_STATE_VARS;
+  d_derived_tbl * tbl;
+  int num_newnz;
+  int newnz_pos[DCTSIZE2];
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* If we've run out of data, don't modify the MCU.
+   */
+  if (! entropy->insufficient_data) {
+
+    Se = cinfo->Se;
+    p1 = 1 << cinfo->Al;	/* 1 in the bit position being coded */
+    m1 = (-1) << cinfo->Al;	/* -1 in the bit position being coded */
+    natural_order = cinfo->natural_order;
+
+    /* Load up working state */
+    BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+    EOBRUN = entropy->saved.EOBRUN; /* only part of saved state we need */
+
+    /* There is always only one block per MCU */
+    block = MCU_data[0];
+    tbl = entropy->ac_derived_tbl;
+
+    /* If we are forced to suspend, we must undo the assignments to any newly
+     * nonzero coefficients in the block, because otherwise we'd get confused
+     * next time about which coefficients were already nonzero.
+     * But we need not undo addition of bits to already-nonzero coefficients;
+     * instead, we can test the current bit to see if we already did it.
+     */
+    num_newnz = 0;
+
+    /* initialize coefficient loop counter to start of band */
+    k = cinfo->Ss;
+
+    if (EOBRUN == 0) {
+      do {
+	HUFF_DECODE(s, br_state, tbl, goto undoit, label3);
+	r = s >> 4;
+	s &= 15;
+	if (s) {
+	  if (s != 1)		/* size of new coef should always be 1 */
+	    WARNMS(cinfo, JWRN_HUFF_BAD_CODE);
+	  CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+	  if (GET_BITS(1))
+	    s = p1;		/* newly nonzero coef is positive */
+	  else
+	    s = m1;		/* newly nonzero coef is negative */
+	} else {
+	  if (r != 15) {
+	    EOBRUN = 1 << r;	/* EOBr, run length is 2^r + appended bits */
+	    if (r) {
+	      CHECK_BIT_BUFFER(br_state, r, goto undoit);
+	      r = GET_BITS(r);
+	      EOBRUN += r;
+	    }
+	    break;		/* rest of block is handled by EOB logic */
+	  }
+	  /* note s = 0 for processing ZRL */
+	}
+	/* Advance over already-nonzero coefs and r still-zero coefs,
+	 * appending correction bits to the nonzeroes.  A correction bit is 1
+	 * if the absolute value of the coefficient must be increased.
+	 */
+	do {
+	  thiscoef = *block + natural_order[k];
+	  if (*thiscoef) {
+	    CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+	    if (GET_BITS(1)) {
+	      if ((*thiscoef & p1) == 0) { /* do nothing if already set it */
+		if (*thiscoef >= 0)
+		  *thiscoef += p1;
+		else
+		  *thiscoef += m1;
+	      }
+	    }
+	  } else {
+	    if (--r < 0)
+	      break;		/* reached target zero coefficient */
+	  }
+	  k++;
+	} while (k <= Se);
+	if (s) {
+	  int pos = natural_order[k];
+	  /* Output newly nonzero coefficient */
+	  (*block)[pos] = (JCOEF) s;
+	  /* Remember its position in case we have to suspend */
+	  newnz_pos[num_newnz++] = pos;
+	}
+	k++;
+      } while (k <= Se);
+    }
+
+    if (EOBRUN) {
+      /* Scan any remaining coefficient positions after the end-of-band
+       * (the last newly nonzero coefficient, if any).  Append a correction
+       * bit to each already-nonzero coefficient.  A correction bit is 1
+       * if the absolute value of the coefficient must be increased.
+       */
+      do {
+	thiscoef = *block + natural_order[k];
+	if (*thiscoef) {
+	  CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+	  if (GET_BITS(1)) {
+	    if ((*thiscoef & p1) == 0) { /* do nothing if already changed it */
+	      if (*thiscoef >= 0)
+		*thiscoef += p1;
+	      else
+		*thiscoef += m1;
+	    }
+	  }
+	}
+	k++;
+      } while (k <= Se);
+      /* Count one block completed in EOB run */
+      EOBRUN--;
+    }
+
+    /* Completed MCU, so update state */
+    BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+    entropy->saved.EOBRUN = EOBRUN; /* only part of saved state we need */
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+
+undoit:
+  /* Re-zero any output coefficients that we made newly nonzero */
+  while (num_newnz)
+    (*block)[newnz_pos[--num_newnz]] = 0;
+
+  return FALSE;
+}
+
+
+/*
+ * Decode one MCU's worth of Huffman-compressed coefficients,
+ * partial blocks.
+ */
+
+METHODDEF(boolean)
+decode_mcu_sub (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  const int * natural_order;
+  int Se, blkn;
+  BITREAD_STATE_VARS;
+  savable_state state;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* If we've run out of data, just leave the MCU set to zeroes.
+   * This way, we return uniform gray for the remainder of the segment.
+   */
+  if (! entropy->insufficient_data) {
+
+    natural_order = cinfo->natural_order;
+    Se = cinfo->lim_Se;
+
+    /* Load up working state */
+    BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(state, entropy->saved);
+
+    /* Outer loop handles each block in the MCU */
+
+    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+      JBLOCKROW block = MCU_data[blkn];
+      d_derived_tbl * htbl;
+      register int s, k, r;
+      int coef_limit, ci;
+
+      /* Decode a single block's worth of coefficients */
+
+      /* Section F.2.2.1: decode the DC coefficient difference */
+      htbl = entropy->dc_cur_tbls[blkn];
+      HUFF_DECODE(s, br_state, htbl, return FALSE, label1);
+
+      htbl = entropy->ac_cur_tbls[blkn];
+      k = 1;
+      coef_limit = entropy->coef_limit[blkn];
+      if (coef_limit) {
+	/* Convert DC difference to actual value, update last_dc_val */
+	if (s) {
+	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	  r = GET_BITS(s);
+	  s = HUFF_EXTEND(r, s);
+	}
+	ci = cinfo->MCU_membership[blkn];
+	s += state.last_dc_val[ci];
+	state.last_dc_val[ci] = s;
+	/* Output the DC coefficient */
+	(*block)[0] = (JCOEF) s;
+
+	/* Section F.2.2.2: decode the AC coefficients */
+	/* Since zeroes are skipped, output area must be cleared beforehand */
+	for (; k < coef_limit; k++) {
+	  HUFF_DECODE(s, br_state, htbl, return FALSE, label2);
+
+	  r = s >> 4;
+	  s &= 15;
+
+	  if (s) {
+	    k += r;
+	    CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	    r = GET_BITS(s);
+	    s = HUFF_EXTEND(r, s);
+	    /* Output coefficient in natural (dezigzagged) order.
+	     * Note: the extra entries in natural_order[] will save us
+	     * if k > Se, which could happen if the data is corrupted.
+	     */
+	    (*block)[natural_order[k]] = (JCOEF) s;
+	  } else {
+	    if (r != 15)
+	      goto EndOfBlock;
+	    k += 15;
+	  }
+	}
+      } else {
+	if (s) {
+	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	  DROP_BITS(s);
+	}
+      }
+
+      /* Section F.2.2.2: decode the AC coefficients */
+      /* In this path we just discard the values */
+      for (; k <= Se; k++) {
+	HUFF_DECODE(s, br_state, htbl, return FALSE, label3);
+
+	r = s >> 4;
+	s &= 15;
+
+	if (s) {
+	  k += r;
+	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	  DROP_BITS(s);
+	} else {
+	  if (r != 15)
+	    break;
+	  k += 15;
+	}
+      }
+
+      EndOfBlock: ;
+    }
+
+    /* Completed MCU, so update state */
+    BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(entropy->saved, state);
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * Decode one MCU's worth of Huffman-compressed coefficients,
+ * full-size blocks.
+ */
+
+METHODDEF(boolean)
+decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int blkn;
+  BITREAD_STATE_VARS;
+  savable_state state;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* If we've run out of data, just leave the MCU set to zeroes.
+   * This way, we return uniform gray for the remainder of the segment.
+   */
+  if (! entropy->insufficient_data) {
+
+    /* Load up working state */
+    BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(state, entropy->saved);
+
+    /* Outer loop handles each block in the MCU */
+
+    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+      JBLOCKROW block = MCU_data[blkn];
+      d_derived_tbl * htbl;
+      register int s, k, r;
+      int coef_limit, ci;
+
+      /* Decode a single block's worth of coefficients */
+
+      /* Section F.2.2.1: decode the DC coefficient difference */
+      htbl = entropy->dc_cur_tbls[blkn];
+      HUFF_DECODE(s, br_state, htbl, return FALSE, label1);
+
+      htbl = entropy->ac_cur_tbls[blkn];
+      k = 1;
+      coef_limit = entropy->coef_limit[blkn];
+      if (coef_limit) {
+	/* Convert DC difference to actual value, update last_dc_val */
+	if (s) {
+	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	  r = GET_BITS(s);
+	  s = HUFF_EXTEND(r, s);
+	}
+	ci = cinfo->MCU_membership[blkn];
+	s += state.last_dc_val[ci];
+	state.last_dc_val[ci] = s;
+	/* Output the DC coefficient */
+	(*block)[0] = (JCOEF) s;
+
+	/* Section F.2.2.2: decode the AC coefficients */
+	/* Since zeroes are skipped, output area must be cleared beforehand */
+	for (; k < coef_limit; k++) {
+	  HUFF_DECODE(s, br_state, htbl, return FALSE, label2);
+
+	  r = s >> 4;
+	  s &= 15;
+
+	  if (s) {
+	    k += r;
+	    CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	    r = GET_BITS(s);
+	    s = HUFF_EXTEND(r, s);
+	    /* Output coefficient in natural (dezigzagged) order.
+	     * Note: the extra entries in jpeg_natural_order[] will save us
+	     * if k >= DCTSIZE2, which could happen if the data is corrupted.
+	     */
+	    (*block)[jpeg_natural_order[k]] = (JCOEF) s;
+	  } else {
+	    if (r != 15)
+	      goto EndOfBlock;
+	    k += 15;
+	  }
+	}
+      } else {
+	if (s) {
+	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	  DROP_BITS(s);
+	}
+      }
+
+      /* Section F.2.2.2: decode the AC coefficients */
+      /* In this path we just discard the values */
+      for (; k < DCTSIZE2; k++) {
+	HUFF_DECODE(s, br_state, htbl, return FALSE, label3);
+
+	r = s >> 4;
+	s &= 15;
+
+	if (s) {
+	  k += r;
+	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	  DROP_BITS(s);
+	} else {
+	  if (r != 15)
+	    break;
+	  k += 15;
+	}
+      }
+
+      EndOfBlock: ;
+    }
+
+    /* Completed MCU, so update state */
+    BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(entropy->saved, state);
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * Initialize for a Huffman-compressed scan.
+ */
+
+METHODDEF(void)
+start_pass_huff_decoder (j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int ci, blkn, tbl, i;
+  jpeg_component_info * compptr;
+
+  if (cinfo->progressive_mode) {
+    /* Validate progressive scan parameters */
+    if (cinfo->Ss == 0) {
+      if (cinfo->Se != 0)
+	goto bad;
+    } else {
+      /* need not check Ss/Se < 0 since they came from unsigned bytes */
+      if (cinfo->Se < cinfo->Ss || cinfo->Se > cinfo->lim_Se)
+	goto bad;
+      /* AC scans may have only one component */
+      if (cinfo->comps_in_scan != 1)
+	goto bad;
+    }
+    if (cinfo->Ah != 0) {
+      /* Successive approximation refinement scan: must have Al = Ah-1. */
+      if (cinfo->Ah-1 != cinfo->Al)
+	goto bad;
+    }
+    if (cinfo->Al > 13) {	/* need not check for < 0 */
+      /* Arguably the maximum Al value should be less than 13 for 8-bit precision,
+       * but the spec doesn't say so, and we try to be liberal about what we
+       * accept.  Note: large Al values could result in out-of-range DC
+       * coefficients during early scans, leading to bizarre displays due to
+       * overflows in the IDCT math.  But we won't crash.
+       */
+      bad:
+      ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
+	       cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
+    }
+    /* Update progression status, and verify that scan order is legal.
+     * Note that inter-scan inconsistencies are treated as warnings
+     * not fatal errors ... not clear if this is right way to behave.
+     */
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      int coefi, cindex = cinfo->cur_comp_info[ci]->component_index;
+      int *coef_bit_ptr = & cinfo->coef_bits[cindex][0];
+      if (cinfo->Ss && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
+	WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
+      for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
+	int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
+	if (cinfo->Ah != expected)
+	  WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi);
+	coef_bit_ptr[coefi] = cinfo->Al;
+      }
+    }
+
+    /* Select MCU decoding routine */
+    if (cinfo->Ah == 0) {
+      if (cinfo->Ss == 0)
+	entropy->pub.decode_mcu = decode_mcu_DC_first;
+      else
+	entropy->pub.decode_mcu = decode_mcu_AC_first;
+    } else {
+      if (cinfo->Ss == 0)
+	entropy->pub.decode_mcu = decode_mcu_DC_refine;
+      else
+	entropy->pub.decode_mcu = decode_mcu_AC_refine;
+    }
+
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      compptr = cinfo->cur_comp_info[ci];
+      /* Make sure requested tables are present, and compute derived tables.
+       * We may build same derived table more than once, but it's not expensive.
+       */
+      if (cinfo->Ss == 0) {
+	if (cinfo->Ah == 0) {	/* DC refinement needs no table */
+	  tbl = compptr->dc_tbl_no;
+	  jpeg_make_d_derived_tbl(cinfo, TRUE, tbl,
+				  & entropy->derived_tbls[tbl]);
+	}
+      } else {
+	tbl = compptr->ac_tbl_no;
+	jpeg_make_d_derived_tbl(cinfo, FALSE, tbl,
+				& entropy->derived_tbls[tbl]);
+	/* remember the single active table */
+	entropy->ac_derived_tbl = entropy->derived_tbls[tbl];
+      }
+      /* Initialize DC predictions to 0 */
+      entropy->saved.last_dc_val[ci] = 0;
+    }
+
+    /* Initialize private state variables */
+    entropy->saved.EOBRUN = 0;
+  } else {
+    /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG.
+     * This ought to be an error condition, but we make it a warning because
+     * there are some baseline files out there with all zeroes in these bytes.
+     */
+    if (cinfo->Ss != 0 || cinfo->Ah != 0 || cinfo->Al != 0 ||
+	((cinfo->is_baseline || cinfo->Se < DCTSIZE2) &&
+	cinfo->Se != cinfo->lim_Se))
+      WARNMS(cinfo, JWRN_NOT_SEQUENTIAL);
+
+    /* Select MCU decoding routine */
+    /* We retain the hard-coded case for full-size blocks.
+     * This is not necessary, but it appears that this version is slightly
+     * more performant in the given implementation.
+     * With an improved implementation we would prefer a single optimized
+     * function.
+     */
+    if (cinfo->lim_Se != DCTSIZE2-1)
+      entropy->pub.decode_mcu = decode_mcu_sub;
+    else
+      entropy->pub.decode_mcu = decode_mcu;
+
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      compptr = cinfo->cur_comp_info[ci];
+      /* Compute derived values for Huffman tables */
+      /* We may do this more than once for a table, but it's not expensive */
+      tbl = compptr->dc_tbl_no;
+      jpeg_make_d_derived_tbl(cinfo, TRUE, tbl,
+			      & entropy->dc_derived_tbls[tbl]);
+      if (cinfo->lim_Se) {	/* AC needs no table when not present */
+	tbl = compptr->ac_tbl_no;
+	jpeg_make_d_derived_tbl(cinfo, FALSE, tbl,
+				& entropy->ac_derived_tbls[tbl]);
+      }
+      /* Initialize DC predictions to 0 */
+      entropy->saved.last_dc_val[ci] = 0;
+    }
+
+    /* Precalculate decoding info for each block in an MCU of this scan */
+    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+      ci = cinfo->MCU_membership[blkn];
+      compptr = cinfo->cur_comp_info[ci];
+      /* Precalculate which table to use for each block */
+      entropy->dc_cur_tbls[blkn] = entropy->dc_derived_tbls[compptr->dc_tbl_no];
+      entropy->ac_cur_tbls[blkn] = entropy->ac_derived_tbls[compptr->ac_tbl_no];
+      /* Decide whether we really care about the coefficient values */
+      if (compptr->component_needed) {
+	ci = compptr->DCT_v_scaled_size;
+	i = compptr->DCT_h_scaled_size;
+	switch (cinfo->lim_Se) {
+	case (1*1-1):
+	  entropy->coef_limit[blkn] = 1;
+	  break;
+	case (2*2-1):
+	  if (ci <= 0 || ci > 2) ci = 2;
+	  if (i <= 0 || i > 2) i = 2;
+	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order2[ci - 1][i - 1];
+	  break;
+	case (3*3-1):
+	  if (ci <= 0 || ci > 3) ci = 3;
+	  if (i <= 0 || i > 3) i = 3;
+	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order3[ci - 1][i - 1];
+	  break;
+	case (4*4-1):
+	  if (ci <= 0 || ci > 4) ci = 4;
+	  if (i <= 0 || i > 4) i = 4;
+	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order4[ci - 1][i - 1];
+	  break;
+	case (5*5-1):
+	  if (ci <= 0 || ci > 5) ci = 5;
+	  if (i <= 0 || i > 5) i = 5;
+	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order5[ci - 1][i - 1];
+	  break;
+	case (6*6-1):
+	  if (ci <= 0 || ci > 6) ci = 6;
+	  if (i <= 0 || i > 6) i = 6;
+	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order6[ci - 1][i - 1];
+	  break;
+	case (7*7-1):
+	  if (ci <= 0 || ci > 7) ci = 7;
+	  if (i <= 0 || i > 7) i = 7;
+	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order7[ci - 1][i - 1];
+	  break;
+	default:
+	  if (ci <= 0 || ci > 8) ci = 8;
+	  if (i <= 0 || i > 8) i = 8;
+	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order[ci - 1][i - 1];
+	  break;
+	}
+      } else {
+	entropy->coef_limit[blkn] = 0;
+      }
+    }
+  }
+
+  /* Initialize bitread state variables */
+  entropy->bitstate.bits_left = 0;
+  entropy->bitstate.get_buffer = 0; /* unnecessary, but keeps Purify quiet */
+  entropy->insufficient_data = FALSE;
+
+  /* Initialize restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+}
+
+
+/*
+ * Module initialization routine for Huffman entropy decoding.
+ */
+
+GLOBAL(void)
+jinit_huff_decoder (j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy;
+  int i;
+
+  entropy = (huff_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(huff_entropy_decoder));
+  cinfo->entropy = &entropy->pub;
+  entropy->pub.start_pass = start_pass_huff_decoder;
+  entropy->pub.finish_pass = finish_pass_huff;
+
+  if (cinfo->progressive_mode) {
+    /* Create progression status table */
+    int *coef_bit_ptr, ci;
+    cinfo->coef_bits = (int (*)[DCTSIZE2])
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  cinfo->num_components*DCTSIZE2*SIZEOF(int));
+    coef_bit_ptr = & cinfo->coef_bits[0][0];
+    for (ci = 0; ci < cinfo->num_components; ci++)
+      for (i = 0; i < DCTSIZE2; i++)
+	*coef_bit_ptr++ = -1;
+
+    /* Mark derived tables unallocated */
+    for (i = 0; i < NUM_HUFF_TBLS; i++) {
+      entropy->derived_tbls[i] = NULL;
+    }
+  } else {
+    /* Mark tables unallocated */
+    for (i = 0; i < NUM_HUFF_TBLS; i++) {
+      entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;
+    }
+  }
+}
diff --git a/libraries/jpeg/jdinput.c b/libraries/jpeg/jdinput.c
new file mode 100644
index 000000000..0199553e8
--- /dev/null
+++ b/libraries/jpeg/jdinput.c
@@ -0,0 +1,662 @@
+/*
+ * jdinput.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2002-2013 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains input control logic for the JPEG decompressor.
+ * These routines are concerned with controlling the decompressor's input
+ * processing (marker reading and coefficient decoding).  The actual input
+ * reading is done in jdmarker.c, jdhuff.c, and jdarith.c.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private state */
+
+typedef struct {
+  struct jpeg_input_controller pub; /* public fields */
+
+  int inheaders;		/* Nonzero until first SOS is reached */
+} my_input_controller;
+
+typedef my_input_controller * my_inputctl_ptr;
+
+
+/* Forward declarations */
+METHODDEF(int) consume_markers JPP((j_decompress_ptr cinfo));
+
+
+/*
+ * Routines to calculate various quantities related to the size of the image.
+ */
+
+
+/*
+ * Compute output image dimensions and related values.
+ * NOTE: this is exported for possible use by application.
+ * Hence it mustn't do anything that can't be done twice.
+ */
+
+GLOBAL(void)
+jpeg_core_output_dimensions (j_decompress_ptr cinfo)
+/* Do computations that are needed before master selection phase.
+ * This function is used for transcoding and full decompression.
+ */
+{
+#ifdef IDCT_SCALING_SUPPORTED
+  int ci;
+  jpeg_component_info *compptr;
+
+  /* Compute actual output image dimensions and DCT scaling choices. */
+  if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom) {
+    /* Provide 1/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 1;
+    cinfo->min_DCT_v_scaled_size = 1;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 2) {
+    /* Provide 2/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 2L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 2L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 2;
+    cinfo->min_DCT_v_scaled_size = 2;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 3) {
+    /* Provide 3/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 3L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 3L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 3;
+    cinfo->min_DCT_v_scaled_size = 3;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 4) {
+    /* Provide 4/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 4L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 4L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 4;
+    cinfo->min_DCT_v_scaled_size = 4;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 5) {
+    /* Provide 5/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 5L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 5L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 5;
+    cinfo->min_DCT_v_scaled_size = 5;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 6) {
+    /* Provide 6/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 6L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 6L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 6;
+    cinfo->min_DCT_v_scaled_size = 6;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 7) {
+    /* Provide 7/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 7L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 7L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 7;
+    cinfo->min_DCT_v_scaled_size = 7;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 8) {
+    /* Provide 8/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 8L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 8L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 8;
+    cinfo->min_DCT_v_scaled_size = 8;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 9) {
+    /* Provide 9/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 9L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 9L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 9;
+    cinfo->min_DCT_v_scaled_size = 9;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 10) {
+    /* Provide 10/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 10L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 10L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 10;
+    cinfo->min_DCT_v_scaled_size = 10;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 11) {
+    /* Provide 11/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 11L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 11L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 11;
+    cinfo->min_DCT_v_scaled_size = 11;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 12) {
+    /* Provide 12/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 12L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 12L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 12;
+    cinfo->min_DCT_v_scaled_size = 12;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 13) {
+    /* Provide 13/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 13L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 13L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 13;
+    cinfo->min_DCT_v_scaled_size = 13;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 14) {
+    /* Provide 14/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 14L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 14L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 14;
+    cinfo->min_DCT_v_scaled_size = 14;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 15) {
+    /* Provide 15/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 15L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 15L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 15;
+    cinfo->min_DCT_v_scaled_size = 15;
+  } else {
+    /* Provide 16/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 16L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 16L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 16;
+    cinfo->min_DCT_v_scaled_size = 16;
+  }
+
+  /* Recompute dimensions of components */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    compptr->DCT_h_scaled_size = cinfo->min_DCT_h_scaled_size;
+    compptr->DCT_v_scaled_size = cinfo->min_DCT_v_scaled_size;
+  }
+
+#else /* !IDCT_SCALING_SUPPORTED */
+
+  /* Hardwire it to "no scaling" */
+  cinfo->output_width = cinfo->image_width;
+  cinfo->output_height = cinfo->image_height;
+  /* initial_setup has already initialized DCT_scaled_size,
+   * and has computed unscaled downsampled_width and downsampled_height.
+   */
+
+#endif /* IDCT_SCALING_SUPPORTED */
+}
+
+
+LOCAL(void)
+initial_setup (j_decompress_ptr cinfo)
+/* Called once, when first SOS marker is reached */
+{
+  int ci;
+  jpeg_component_info *compptr;
+
+  /* Make sure image isn't bigger than I can handle */
+  if ((long) cinfo->image_height > (long) JPEG_MAX_DIMENSION ||
+      (long) cinfo->image_width > (long) JPEG_MAX_DIMENSION)
+    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
+
+  /* Only 8 to 12 bits data precision are supported for DCT based JPEG */
+  if (cinfo->data_precision < 8 || cinfo->data_precision > 12)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  /* Check that number of components won't exceed internal array sizes */
+  if (cinfo->num_components > MAX_COMPONENTS)
+    ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
+	     MAX_COMPONENTS);
+
+  /* Compute maximum sampling factors; check factor validity */
+  cinfo->max_h_samp_factor = 1;
+  cinfo->max_v_samp_factor = 1;
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR ||
+	compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR)
+      ERREXIT(cinfo, JERR_BAD_SAMPLING);
+    cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor,
+				   compptr->h_samp_factor);
+    cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor,
+				   compptr->v_samp_factor);
+  }
+
+  /* Derive block_size, natural_order, and lim_Se */
+  if (cinfo->is_baseline || (cinfo->progressive_mode &&
+      cinfo->comps_in_scan)) { /* no pseudo SOS marker */
+    cinfo->block_size = DCTSIZE;
+    cinfo->natural_order = jpeg_natural_order;
+    cinfo->lim_Se = DCTSIZE2-1;
+  } else
+    switch (cinfo->Se) {
+    case (1*1-1):
+      cinfo->block_size = 1;
+      cinfo->natural_order = jpeg_natural_order; /* not needed */
+      cinfo->lim_Se = cinfo->Se;
+      break;
+    case (2*2-1):
+      cinfo->block_size = 2;
+      cinfo->natural_order = jpeg_natural_order2;
+      cinfo->lim_Se = cinfo->Se;
+      break;
+    case (3*3-1):
+      cinfo->block_size = 3;
+      cinfo->natural_order = jpeg_natural_order3;
+      cinfo->lim_Se = cinfo->Se;
+      break;
+    case (4*4-1):
+      cinfo->block_size = 4;
+      cinfo->natural_order = jpeg_natural_order4;
+      cinfo->lim_Se = cinfo->Se;
+      break;
+    case (5*5-1):
+      cinfo->block_size = 5;
+      cinfo->natural_order = jpeg_natural_order5;
+      cinfo->lim_Se = cinfo->Se;
+      break;
+    case (6*6-1):
+      cinfo->block_size = 6;
+      cinfo->natural_order = jpeg_natural_order6;
+      cinfo->lim_Se = cinfo->Se;
+      break;
+    case (7*7-1):
+      cinfo->block_size = 7;
+      cinfo->natural_order = jpeg_natural_order7;
+      cinfo->lim_Se = cinfo->Se;
+      break;
+    case (8*8-1):
+      cinfo->block_size = 8;
+      cinfo->natural_order = jpeg_natural_order;
+      cinfo->lim_Se = DCTSIZE2-1;
+      break;
+    case (9*9-1):
+      cinfo->block_size = 9;
+      cinfo->natural_order = jpeg_natural_order;
+      cinfo->lim_Se = DCTSIZE2-1;
+      break;
+    case (10*10-1):
+      cinfo->block_size = 10;
+      cinfo->natural_order = jpeg_natural_order;
+      cinfo->lim_Se = DCTSIZE2-1;
+      break;
+    case (11*11-1):
+      cinfo->block_size = 11;
+      cinfo->natural_order = jpeg_natural_order;
+      cinfo->lim_Se = DCTSIZE2-1;
+      break;
+    case (12*12-1):
+      cinfo->block_size = 12;
+      cinfo->natural_order = jpeg_natural_order;
+      cinfo->lim_Se = DCTSIZE2-1;
+      break;
+    case (13*13-1):
+      cinfo->block_size = 13;
+      cinfo->natural_order = jpeg_natural_order;
+      cinfo->lim_Se = DCTSIZE2-1;
+      break;
+    case (14*14-1):
+      cinfo->block_size = 14;
+      cinfo->natural_order = jpeg_natural_order;
+      cinfo->lim_Se = DCTSIZE2-1;
+      break;
+    case (15*15-1):
+      cinfo->block_size = 15;
+      cinfo->natural_order = jpeg_natural_order;
+      cinfo->lim_Se = DCTSIZE2-1;
+      break;
+    case (16*16-1):
+      cinfo->block_size = 16;
+      cinfo->natural_order = jpeg_natural_order;
+      cinfo->lim_Se = DCTSIZE2-1;
+      break;
+    default:
+      ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
+	       cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
+      break;
+    }
+
+  /* We initialize DCT_scaled_size and min_DCT_scaled_size to block_size.
+   * In the full decompressor,
+   * this will be overridden by jpeg_calc_output_dimensions in jdmaster.c;
+   * but in the transcoder,
+   * jpeg_calc_output_dimensions is not used, so we must do it here.
+   */
+  cinfo->min_DCT_h_scaled_size = cinfo->block_size;
+  cinfo->min_DCT_v_scaled_size = cinfo->block_size;
+
+  /* Compute dimensions of components */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    compptr->DCT_h_scaled_size = cinfo->block_size;
+    compptr->DCT_v_scaled_size = cinfo->block_size;
+    /* Size in DCT blocks */
+    compptr->width_in_blocks = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
+		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
+    compptr->height_in_blocks = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
+		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
+    /* downsampled_width and downsampled_height will also be overridden by
+     * jdmaster.c if we are doing full decompression.  The transcoder library
+     * doesn't use these values, but the calling application might.
+     */
+    /* Size in samples */
+    compptr->downsampled_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
+		    (long) cinfo->max_h_samp_factor);
+    compptr->downsampled_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
+		    (long) cinfo->max_v_samp_factor);
+    /* Mark component needed, until color conversion says otherwise */
+    compptr->component_needed = TRUE;
+    /* Mark no quantization table yet saved for component */
+    compptr->quant_table = NULL;
+  }
+
+  /* Compute number of fully interleaved MCU rows. */
+  cinfo->total_iMCU_rows = (JDIMENSION)
+    jdiv_round_up((long) cinfo->image_height,
+	          (long) (cinfo->max_v_samp_factor * cinfo->block_size));
+
+  /* Decide whether file contains multiple scans */
+  if (cinfo->comps_in_scan < cinfo->num_components || cinfo->progressive_mode)
+    cinfo->inputctl->has_multiple_scans = TRUE;
+  else
+    cinfo->inputctl->has_multiple_scans = FALSE;
+}
+
+
+LOCAL(void)
+per_scan_setup (j_decompress_ptr cinfo)
+/* Do computations that are needed before processing a JPEG scan */
+/* cinfo->comps_in_scan and cinfo->cur_comp_info[] were set from SOS marker */
+{
+  int ci, mcublks, tmp;
+  jpeg_component_info *compptr;
+  
+  if (cinfo->comps_in_scan == 1) {
+    
+    /* Noninterleaved (single-component) scan */
+    compptr = cinfo->cur_comp_info[0];
+    
+    /* Overall image size in MCUs */
+    cinfo->MCUs_per_row = compptr->width_in_blocks;
+    cinfo->MCU_rows_in_scan = compptr->height_in_blocks;
+    
+    /* For noninterleaved scan, always one block per MCU */
+    compptr->MCU_width = 1;
+    compptr->MCU_height = 1;
+    compptr->MCU_blocks = 1;
+    compptr->MCU_sample_width = compptr->DCT_h_scaled_size;
+    compptr->last_col_width = 1;
+    /* For noninterleaved scans, it is convenient to define last_row_height
+     * as the number of block rows present in the last iMCU row.
+     */
+    tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+    if (tmp == 0) tmp = compptr->v_samp_factor;
+    compptr->last_row_height = tmp;
+    
+    /* Prepare array describing MCU composition */
+    cinfo->blocks_in_MCU = 1;
+    cinfo->MCU_membership[0] = 0;
+    
+  } else {
+    
+    /* Interleaved (multi-component) scan */
+    if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan,
+	       MAX_COMPS_IN_SCAN);
+    
+    /* Overall image size in MCUs */
+    cinfo->MCUs_per_row = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width,
+		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
+    cinfo->MCU_rows_in_scan = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height,
+		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
+    
+    cinfo->blocks_in_MCU = 0;
+    
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      compptr = cinfo->cur_comp_info[ci];
+      /* Sampling factors give # of blocks of component in each MCU */
+      compptr->MCU_width = compptr->h_samp_factor;
+      compptr->MCU_height = compptr->v_samp_factor;
+      compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
+      compptr->MCU_sample_width = compptr->MCU_width * compptr->DCT_h_scaled_size;
+      /* Figure number of non-dummy blocks in last MCU column & row */
+      tmp = (int) (compptr->width_in_blocks % compptr->MCU_width);
+      if (tmp == 0) tmp = compptr->MCU_width;
+      compptr->last_col_width = tmp;
+      tmp = (int) (compptr->height_in_blocks % compptr->MCU_height);
+      if (tmp == 0) tmp = compptr->MCU_height;
+      compptr->last_row_height = tmp;
+      /* Prepare array describing MCU composition */
+      mcublks = compptr->MCU_blocks;
+      if (cinfo->blocks_in_MCU + mcublks > D_MAX_BLOCKS_IN_MCU)
+	ERREXIT(cinfo, JERR_BAD_MCU_SIZE);
+      while (mcublks-- > 0) {
+	cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci;
+      }
+    }
+    
+  }
+}
+
+
+/*
+ * Save away a copy of the Q-table referenced by each component present
+ * in the current scan, unless already saved during a prior scan.
+ *
+ * In a multiple-scan JPEG file, the encoder could assign different components
+ * the same Q-table slot number, but change table definitions between scans
+ * so that each component uses a different Q-table.  (The IJG encoder is not
+ * currently capable of doing this, but other encoders might.)  Since we want
+ * to be able to dequantize all the components at the end of the file, this
+ * means that we have to save away the table actually used for each component.
+ * We do this by copying the table at the start of the first scan containing
+ * the component.
+ * The JPEG spec prohibits the encoder from changing the contents of a Q-table
+ * slot between scans of a component using that slot.  If the encoder does so
+ * anyway, this decoder will simply use the Q-table values that were current
+ * at the start of the first scan for the component.
+ *
+ * The decompressor output side looks only at the saved quant tables,
+ * not at the current Q-table slots.
+ */
+
+LOCAL(void)
+latch_quant_tables (j_decompress_ptr cinfo)
+{
+  int ci, qtblno;
+  jpeg_component_info *compptr;
+  JQUANT_TBL * qtbl;
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    /* No work if we already saved Q-table for this component */
+    if (compptr->quant_table != NULL)
+      continue;
+    /* Make sure specified quantization table is present */
+    qtblno = compptr->quant_tbl_no;
+    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
+	cinfo->quant_tbl_ptrs[qtblno] == NULL)
+      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
+    /* OK, save away the quantization table */
+    qtbl = (JQUANT_TBL *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(JQUANT_TBL));
+    MEMCOPY(qtbl, cinfo->quant_tbl_ptrs[qtblno], SIZEOF(JQUANT_TBL));
+    compptr->quant_table = qtbl;
+  }
+}
+
+
+/*
+ * Initialize the input modules to read a scan of compressed data.
+ * The first call to this is done by jdmaster.c after initializing
+ * the entire decompressor (during jpeg_start_decompress).
+ * Subsequent calls come from consume_markers, below.
+ */
+
+METHODDEF(void)
+start_input_pass (j_decompress_ptr cinfo)
+{
+  per_scan_setup(cinfo);
+  latch_quant_tables(cinfo);
+  (*cinfo->entropy->start_pass) (cinfo);
+  (*cinfo->coef->start_input_pass) (cinfo);
+  cinfo->inputctl->consume_input = cinfo->coef->consume_data;
+}
+
+
+/*
+ * Finish up after inputting a compressed-data scan.
+ * This is called by the coefficient controller after it's read all
+ * the expected data of the scan.
+ */
+
+METHODDEF(void)
+finish_input_pass (j_decompress_ptr cinfo)
+{
+  (*cinfo->entropy->finish_pass) (cinfo);
+  cinfo->inputctl->consume_input = consume_markers;
+}
+
+
+/*
+ * Read JPEG markers before, between, or after compressed-data scans.
+ * Change state as necessary when a new scan is reached.
+ * Return value is JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
+ *
+ * The consume_input method pointer points either here or to the
+ * coefficient controller's consume_data routine, depending on whether
+ * we are reading a compressed data segment or inter-segment markers.
+ *
+ * Note: This function should NOT return a pseudo SOS marker (with zero
+ * component number) to the caller.  A pseudo marker received by
+ * read_markers is processed and then skipped for other markers.
+ */
+
+METHODDEF(int)
+consume_markers (j_decompress_ptr cinfo)
+{
+  my_inputctl_ptr inputctl = (my_inputctl_ptr) cinfo->inputctl;
+  int val;
+
+  if (inputctl->pub.eoi_reached) /* After hitting EOI, read no further */
+    return JPEG_REACHED_EOI;
+
+  for (;;) {			/* Loop to pass pseudo SOS marker */
+    val = (*cinfo->marker->read_markers) (cinfo);
+
+    switch (val) {
+    case JPEG_REACHED_SOS:	/* Found SOS */
+      if (inputctl->inheaders) { /* 1st SOS */
+	if (inputctl->inheaders == 1)
+	  initial_setup(cinfo);
+	if (cinfo->comps_in_scan == 0) { /* pseudo SOS marker */
+	  inputctl->inheaders = 2;
+	  break;
+	}
+	inputctl->inheaders = 0;
+	/* Note: start_input_pass must be called by jdmaster.c
+	 * before any more input can be consumed.  jdapimin.c is
+	 * responsible for enforcing this sequencing.
+	 */
+      } else {			/* 2nd or later SOS marker */
+	if (! inputctl->pub.has_multiple_scans)
+	  ERREXIT(cinfo, JERR_EOI_EXPECTED); /* Oops, I wasn't expecting this! */
+	if (cinfo->comps_in_scan == 0) /* unexpected pseudo SOS marker */
+	  break;
+	start_input_pass(cinfo);
+      }
+      return val;
+    case JPEG_REACHED_EOI:	/* Found EOI */
+      inputctl->pub.eoi_reached = TRUE;
+      if (inputctl->inheaders) { /* Tables-only datastream, apparently */
+	if (cinfo->marker->saw_SOF)
+	  ERREXIT(cinfo, JERR_SOF_NO_SOS);
+      } else {
+	/* Prevent infinite loop in coef ctlr's decompress_data routine
+	 * if user set output_scan_number larger than number of scans.
+	 */
+	if (cinfo->output_scan_number > cinfo->input_scan_number)
+	  cinfo->output_scan_number = cinfo->input_scan_number;
+      }
+      return val;
+    case JPEG_SUSPENDED:
+      return val;
+    default:
+      return val;
+    }
+  }
+}
+
+
+/*
+ * Reset state to begin a fresh datastream.
+ */
+
+METHODDEF(void)
+reset_input_controller (j_decompress_ptr cinfo)
+{
+  my_inputctl_ptr inputctl = (my_inputctl_ptr) cinfo->inputctl;
+
+  inputctl->pub.consume_input = consume_markers;
+  inputctl->pub.has_multiple_scans = FALSE; /* "unknown" would be better */
+  inputctl->pub.eoi_reached = FALSE;
+  inputctl->inheaders = 1;
+  /* Reset other modules */
+  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
+  (*cinfo->marker->reset_marker_reader) (cinfo);
+  /* Reset progression state -- would be cleaner if entropy decoder did this */
+  cinfo->coef_bits = NULL;
+}
+
+
+/*
+ * Initialize the input controller module.
+ * This is called only once, when the decompression object is created.
+ */
+
+GLOBAL(void)
+jinit_input_controller (j_decompress_ptr cinfo)
+{
+  my_inputctl_ptr inputctl;
+
+  /* Create subobject in permanent pool */
+  inputctl = (my_inputctl_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				SIZEOF(my_input_controller));
+  cinfo->inputctl = &inputctl->pub;
+  /* Initialize method pointers */
+  inputctl->pub.consume_input = consume_markers;
+  inputctl->pub.reset_input_controller = reset_input_controller;
+  inputctl->pub.start_input_pass = start_input_pass;
+  inputctl->pub.finish_input_pass = finish_input_pass;
+  /* Initialize state: can't use reset_input_controller since we don't
+   * want to try to reset other modules yet.
+   */
+  inputctl->pub.has_multiple_scans = FALSE; /* "unknown" would be better */
+  inputctl->pub.eoi_reached = FALSE;
+  inputctl->inheaders = 1;
+}
diff --git a/libraries/jpeg/jdmainct.c b/libraries/jpeg/jdmainct.c
new file mode 100644
index 000000000..4d738fbae
--- /dev/null
+++ b/libraries/jpeg/jdmainct.c
@@ -0,0 +1,507 @@
+/*
+ * jdmainct.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2002-2016 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the main buffer controller for decompression.
+ * The main buffer lies between the JPEG decompressor proper and the
+ * post-processor; it holds downsampled data in the JPEG colorspace.
+ *
+ * Note that this code is bypassed in raw-data mode, since the application
+ * supplies the equivalent of the main buffer in that case.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * In the current system design, the main buffer need never be a full-image
+ * buffer; any full-height buffers will be found inside the coefficient or
+ * postprocessing controllers.  Nonetheless, the main controller is not
+ * trivial.  Its responsibility is to provide context rows for upsampling/
+ * rescaling, and doing this in an efficient fashion is a bit tricky.
+ *
+ * Postprocessor input data is counted in "row groups".  A row group is
+ * defined to be (v_samp_factor * DCT_v_scaled_size / min_DCT_v_scaled_size)
+ * sample rows of each component.  (We require DCT_scaled_size values to be
+ * chosen such that these numbers are integers.  In practice DCT_scaled_size
+ * values will likely be powers of two, so we actually have the stronger
+ * condition that DCT_scaled_size / min_DCT_scaled_size is an integer.)
+ * Upsampling will typically produce max_v_samp_factor pixel rows from each
+ * row group (times any additional scale factor that the upsampler is
+ * applying).
+ *
+ * The coefficient controller will deliver data to us one iMCU row at a time;
+ * each iMCU row contains v_samp_factor * DCT_v_scaled_size sample rows, or
+ * exactly min_DCT_v_scaled_size row groups.  (This amount of data corresponds
+ * to one row of MCUs when the image is fully interleaved.)  Note that the
+ * number of sample rows varies across components, but the number of row
+ * groups does not.  Some garbage sample rows may be included in the last iMCU
+ * row at the bottom of the image.
+ *
+ * Depending on the vertical scaling algorithm used, the upsampler may need
+ * access to the sample row(s) above and below its current input row group.
+ * The upsampler is required to set need_context_rows TRUE at global selection
+ * time if so.  When need_context_rows is FALSE, this controller can simply
+ * obtain one iMCU row at a time from the coefficient controller and dole it
+ * out as row groups to the postprocessor.
+ *
+ * When need_context_rows is TRUE, this controller guarantees that the buffer
+ * passed to postprocessing contains at least one row group's worth of samples
+ * above and below the row group(s) being processed.  Note that the context
+ * rows "above" the first passed row group appear at negative row offsets in
+ * the passed buffer.  At the top and bottom of the image, the required
+ * context rows are manufactured by duplicating the first or last real sample
+ * row; this avoids having special cases in the upsampling inner loops.
+ *
+ * The amount of context is fixed at one row group just because that's a
+ * convenient number for this controller to work with.  The existing
+ * upsamplers really only need one sample row of context.  An upsampler
+ * supporting arbitrary output rescaling might wish for more than one row
+ * group of context when shrinking the image; tough, we don't handle that.
+ * (This is justified by the assumption that downsizing will be handled mostly
+ * by adjusting the DCT_scaled_size values, so that the actual scale factor at
+ * the upsample step needn't be much less than one.)
+ *
+ * To provide the desired context, we have to retain the last two row groups
+ * of one iMCU row while reading in the next iMCU row.  (The last row group
+ * can't be processed until we have another row group for its below-context,
+ * and so we have to save the next-to-last group too for its above-context.)
+ * We could do this most simply by copying data around in our buffer, but
+ * that'd be very slow.  We can avoid copying any data by creating a rather
+ * strange pointer structure.  Here's how it works.  We allocate a workspace
+ * consisting of M+2 row groups (where M = min_DCT_v_scaled_size is the number
+ * of row groups per iMCU row).  We create two sets of redundant pointers to
+ * the workspace.  Labeling the physical row groups 0 to M+1, the synthesized
+ * pointer lists look like this:
+ *                   M+1                          M-1
+ * master pointer --> 0         master pointer --> 0
+ *                    1                            1
+ *                   ...                          ...
+ *                   M-3                          M-3
+ *                   M-2                           M
+ *                   M-1                          M+1
+ *                    M                           M-2
+ *                   M+1                          M-1
+ *                    0                            0
+ * We read alternate iMCU rows using each master pointer; thus the last two
+ * row groups of the previous iMCU row remain un-overwritten in the workspace.
+ * The pointer lists are set up so that the required context rows appear to
+ * be adjacent to the proper places when we pass the pointer lists to the
+ * upsampler.
+ *
+ * The above pictures describe the normal state of the pointer lists.
+ * At top and bottom of the image, we diddle the pointer lists to duplicate
+ * the first or last sample row as necessary (this is cheaper than copying
+ * sample rows around).
+ *
+ * This scheme breaks down if M < 2, ie, min_DCT_v_scaled_size is 1.  In that
+ * situation each iMCU row provides only one row group so the buffering logic
+ * must be different (eg, we must read two iMCU rows before we can emit the
+ * first row group).  For now, we simply do not support providing context
+ * rows when min_DCT_v_scaled_size is 1.  That combination seems unlikely to
+ * be worth providing --- if someone wants a 1/8th-size preview, they probably
+ * want it quick and dirty, so a context-free upsampler is sufficient.
+ */
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_d_main_controller pub; /* public fields */
+
+  /* Pointer to allocated workspace (M or M+2 row groups). */
+  JSAMPARRAY buffer[MAX_COMPONENTS];
+
+  JDIMENSION rowgroup_ctr;	/* counts row groups output to postprocessor */
+  JDIMENSION rowgroups_avail;	/* row groups available to postprocessor */
+
+  /* Remaining fields are only used in the context case. */
+
+  boolean buffer_full;		/* Have we gotten an iMCU row from decoder? */
+
+  /* These are the master pointers to the funny-order pointer lists. */
+  JSAMPIMAGE xbuffer[2];	/* pointers to weird pointer lists */
+
+  int whichptr;			/* indicates which pointer set is now in use */
+  int context_state;		/* process_data state machine status */
+  JDIMENSION iMCU_row_ctr;	/* counts iMCU rows to detect image top/bot */
+} my_main_controller;
+
+typedef my_main_controller * my_main_ptr;
+
+/* context_state values: */
+#define CTX_PREPARE_FOR_IMCU	0	/* need to prepare for MCU row */
+#define CTX_PROCESS_IMCU	1	/* feeding iMCU to postprocessor */
+#define CTX_POSTPONED_ROW	2	/* feeding postponed row group */
+
+
+/* Forward declarations */
+METHODDEF(void) process_data_simple_main
+	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
+METHODDEF(void) process_data_context_main
+	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
+#ifdef QUANT_2PASS_SUPPORTED
+METHODDEF(void) process_data_crank_post
+	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
+#endif
+
+
+LOCAL(void)
+alloc_funny_pointers (j_decompress_ptr cinfo)
+/* Allocate space for the funny pointer lists.
+ * This is done only once, not once per pass.
+ */
+{
+  my_main_ptr mainp = (my_main_ptr) cinfo->main;
+  int ci, rgroup;
+  int M = cinfo->min_DCT_v_scaled_size;
+  jpeg_component_info *compptr;
+  JSAMPARRAY xbuf;
+
+  /* Get top-level space for component array pointers.
+   * We alloc both arrays with one call to save a few cycles.
+   */
+  mainp->xbuffer[0] = (JSAMPIMAGE)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				cinfo->num_components * 2 * SIZEOF(JSAMPARRAY));
+  mainp->xbuffer[1] = mainp->xbuffer[0] + cinfo->num_components;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
+      cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
+    /* Get space for pointer lists --- M+4 row groups in each list.
+     * We alloc both pointer lists with one call to save a few cycles.
+     */
+    xbuf = (JSAMPARRAY)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  2 * (rgroup * (M + 4)) * SIZEOF(JSAMPROW));
+    xbuf += rgroup;		/* want one row group at negative offsets */
+    mainp->xbuffer[0][ci] = xbuf;
+    xbuf += rgroup * (M + 4);
+    mainp->xbuffer[1][ci] = xbuf;
+  }
+}
+
+
+LOCAL(void)
+make_funny_pointers (j_decompress_ptr cinfo)
+/* Create the funny pointer lists discussed in the comments above.
+ * The actual workspace is already allocated (in mainp->buffer),
+ * and the space for the pointer lists is allocated too.
+ * This routine just fills in the curiously ordered lists.
+ * This will be repeated at the beginning of each pass.
+ */
+{
+  my_main_ptr mainp = (my_main_ptr) cinfo->main;
+  int ci, i, rgroup;
+  int M = cinfo->min_DCT_v_scaled_size;
+  jpeg_component_info *compptr;
+  JSAMPARRAY buf, xbuf0, xbuf1;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
+      cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
+    xbuf0 = mainp->xbuffer[0][ci];
+    xbuf1 = mainp->xbuffer[1][ci];
+    /* First copy the workspace pointers as-is */
+    buf = mainp->buffer[ci];
+    for (i = 0; i < rgroup * (M + 2); i++) {
+      xbuf0[i] = xbuf1[i] = buf[i];
+    }
+    /* In the second list, put the last four row groups in swapped order */
+    for (i = 0; i < rgroup * 2; i++) {
+      xbuf1[rgroup*(M-2) + i] = buf[rgroup*M + i];
+      xbuf1[rgroup*M + i] = buf[rgroup*(M-2) + i];
+    }
+    /* The wraparound pointers at top and bottom will be filled later
+     * (see set_wraparound_pointers, below).  Initially we want the "above"
+     * pointers to duplicate the first actual data line.  This only needs
+     * to happen in xbuffer[0].
+     */
+    for (i = 0; i < rgroup; i++) {
+      xbuf0[i - rgroup] = xbuf0[0];
+    }
+  }
+}
+
+
+LOCAL(void)
+set_wraparound_pointers (j_decompress_ptr cinfo)
+/* Set up the "wraparound" pointers at top and bottom of the pointer lists.
+ * This changes the pointer list state from top-of-image to the normal state.
+ */
+{
+  my_main_ptr mainp = (my_main_ptr) cinfo->main;
+  int ci, i, rgroup;
+  int M = cinfo->min_DCT_v_scaled_size;
+  jpeg_component_info *compptr;
+  JSAMPARRAY xbuf0, xbuf1;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
+      cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
+    xbuf0 = mainp->xbuffer[0][ci];
+    xbuf1 = mainp->xbuffer[1][ci];
+    for (i = 0; i < rgroup; i++) {
+      xbuf0[i - rgroup] = xbuf0[rgroup*(M+1) + i];
+      xbuf1[i - rgroup] = xbuf1[rgroup*(M+1) + i];
+      xbuf0[rgroup*(M+2) + i] = xbuf0[i];
+      xbuf1[rgroup*(M+2) + i] = xbuf1[i];
+    }
+  }
+}
+
+
+LOCAL(void)
+set_bottom_pointers (j_decompress_ptr cinfo)
+/* Change the pointer lists to duplicate the last sample row at the bottom
+ * of the image.  whichptr indicates which xbuffer holds the final iMCU row.
+ * Also sets rowgroups_avail to indicate number of nondummy row groups in row.
+ */
+{
+  my_main_ptr mainp = (my_main_ptr) cinfo->main;
+  int ci, i, rgroup, iMCUheight, rows_left;
+  jpeg_component_info *compptr;
+  JSAMPARRAY xbuf;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Count sample rows in one iMCU row and in one row group */
+    iMCUheight = compptr->v_samp_factor * compptr->DCT_v_scaled_size;
+    rgroup = iMCUheight / cinfo->min_DCT_v_scaled_size;
+    /* Count nondummy sample rows remaining for this component */
+    rows_left = (int) (compptr->downsampled_height % (JDIMENSION) iMCUheight);
+    if (rows_left == 0) rows_left = iMCUheight;
+    /* Count nondummy row groups.  Should get same answer for each component,
+     * so we need only do it once.
+     */
+    if (ci == 0) {
+      mainp->rowgroups_avail = (JDIMENSION) ((rows_left-1) / rgroup + 1);
+    }
+    /* Duplicate the last real sample row rgroup*2 times; this pads out the
+     * last partial rowgroup and ensures at least one full rowgroup of context.
+     */
+    xbuf = mainp->xbuffer[mainp->whichptr][ci];
+    for (i = 0; i < rgroup * 2; i++) {
+      xbuf[rows_left + i] = xbuf[rows_left-1];
+    }
+  }
+}
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_main (j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_main_ptr mainp = (my_main_ptr) cinfo->main;
+
+  switch (pass_mode) {
+  case JBUF_PASS_THRU:
+    if (cinfo->upsample->need_context_rows) {
+      mainp->pub.process_data = process_data_context_main;
+      make_funny_pointers(cinfo); /* Create the xbuffer[] lists */
+      mainp->whichptr = 0;	/* Read first iMCU row into xbuffer[0] */
+      mainp->context_state = CTX_PREPARE_FOR_IMCU;
+      mainp->iMCU_row_ctr = 0;
+      mainp->buffer_full = FALSE; /* Mark buffer empty */
+    } else {
+      /* Simple case with no context needed */
+      mainp->pub.process_data = process_data_simple_main;
+      mainp->rowgroup_ctr = mainp->rowgroups_avail; /* Mark buffer empty */
+    }
+    break;
+#ifdef QUANT_2PASS_SUPPORTED
+  case JBUF_CRANK_DEST:
+    /* For last pass of 2-pass quantization, just crank the postprocessor */
+    mainp->pub.process_data = process_data_crank_post;
+    break;
+#endif
+  default:
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    break;
+  }
+}
+
+
+/*
+ * Process some data.
+ * This handles the simple case where no context is required.
+ */
+
+METHODDEF(void)
+process_data_simple_main (j_decompress_ptr cinfo,
+			  JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+			  JDIMENSION out_rows_avail)
+{
+  my_main_ptr mainp = (my_main_ptr) cinfo->main;
+
+  /* Read input data if we haven't filled the main buffer yet */
+  if (mainp->rowgroup_ctr >= mainp->rowgroups_avail) {
+    if (! (*cinfo->coef->decompress_data) (cinfo, mainp->buffer))
+      return;			/* suspension forced, can do nothing more */
+    mainp->rowgroup_ctr = 0;	/* OK, we have an iMCU row to work with */
+  }
+
+  /* Note: at the bottom of the image, we may pass extra garbage row groups
+   * to the postprocessor.  The postprocessor has to check for bottom
+   * of image anyway (at row resolution), so no point in us doing it too.
+   */
+
+  /* Feed the postprocessor */
+  (*cinfo->post->post_process_data) (cinfo, mainp->buffer,
+			&mainp->rowgroup_ctr, mainp->rowgroups_avail,
+			output_buf, out_row_ctr, out_rows_avail);
+}
+
+
+/*
+ * Process some data.
+ * This handles the case where context rows must be provided.
+ */
+
+METHODDEF(void)
+process_data_context_main (j_decompress_ptr cinfo,
+			   JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+			   JDIMENSION out_rows_avail)
+{
+  my_main_ptr mainp = (my_main_ptr) cinfo->main;
+
+  /* Read input data if we haven't filled the main buffer yet */
+  if (! mainp->buffer_full) {
+    if (! (*cinfo->coef->decompress_data) (cinfo,
+					   mainp->xbuffer[mainp->whichptr]))
+      return;			/* suspension forced, can do nothing more */
+    mainp->buffer_full = TRUE;	/* OK, we have an iMCU row to work with */
+    mainp->iMCU_row_ctr++;	/* count rows received */
+  }
+
+  /* Postprocessor typically will not swallow all the input data it is handed
+   * in one call (due to filling the output buffer first).  Must be prepared
+   * to exit and restart.  This switch lets us keep track of how far we got.
+   * Note that each case falls through to the next on successful completion.
+   */
+  switch (mainp->context_state) {
+  case CTX_POSTPONED_ROW:
+    /* Call postprocessor using previously set pointers for postponed row */
+    (*cinfo->post->post_process_data) (cinfo, mainp->xbuffer[mainp->whichptr],
+			&mainp->rowgroup_ctr, mainp->rowgroups_avail,
+			output_buf, out_row_ctr, out_rows_avail);
+    if (mainp->rowgroup_ctr < mainp->rowgroups_avail)
+      return;			/* Need to suspend */
+    mainp->context_state = CTX_PREPARE_FOR_IMCU;
+    if (*out_row_ctr >= out_rows_avail)
+      return;			/* Postprocessor exactly filled output buf */
+    /*FALLTHROUGH*/
+  case CTX_PREPARE_FOR_IMCU:
+    /* Prepare to process first M-1 row groups of this iMCU row */
+    mainp->rowgroup_ctr = 0;
+    mainp->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_v_scaled_size - 1);
+    /* Check for bottom of image: if so, tweak pointers to "duplicate"
+     * the last sample row, and adjust rowgroups_avail to ignore padding rows.
+     */
+    if (mainp->iMCU_row_ctr == cinfo->total_iMCU_rows)
+      set_bottom_pointers(cinfo);
+    mainp->context_state = CTX_PROCESS_IMCU;
+    /*FALLTHROUGH*/
+  case CTX_PROCESS_IMCU:
+    /* Call postprocessor using previously set pointers */
+    (*cinfo->post->post_process_data) (cinfo, mainp->xbuffer[mainp->whichptr],
+			&mainp->rowgroup_ctr, mainp->rowgroups_avail,
+			output_buf, out_row_ctr, out_rows_avail);
+    if (mainp->rowgroup_ctr < mainp->rowgroups_avail)
+      return;			/* Need to suspend */
+    /* After the first iMCU, change wraparound pointers to normal state */
+    if (mainp->iMCU_row_ctr == 1)
+      set_wraparound_pointers(cinfo);
+    /* Prepare to load new iMCU row using other xbuffer list */
+    mainp->whichptr ^= 1;	/* 0=>1 or 1=>0 */
+    mainp->buffer_full = FALSE;
+    /* Still need to process last row group of this iMCU row, */
+    /* which is saved at index M+1 of the other xbuffer */
+    mainp->rowgroup_ctr = (JDIMENSION) (cinfo->min_DCT_v_scaled_size + 1);
+    mainp->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_v_scaled_size + 2);
+    mainp->context_state = CTX_POSTPONED_ROW;
+  }
+}
+
+
+/*
+ * Process some data.
+ * Final pass of two-pass quantization: just call the postprocessor.
+ * Source data will be the postprocessor controller's internal buffer.
+ */
+
+#ifdef QUANT_2PASS_SUPPORTED
+
+METHODDEF(void)
+process_data_crank_post (j_decompress_ptr cinfo,
+			 JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+			 JDIMENSION out_rows_avail)
+{
+  (*cinfo->post->post_process_data) (cinfo, (JSAMPIMAGE) NULL,
+				     (JDIMENSION *) NULL, (JDIMENSION) 0,
+				     output_buf, out_row_ctr, out_rows_avail);
+}
+
+#endif /* QUANT_2PASS_SUPPORTED */
+
+
+/*
+ * Initialize main buffer controller.
+ */
+
+GLOBAL(void)
+jinit_d_main_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
+{
+  my_main_ptr mainp;
+  int ci, rgroup, ngroups;
+  jpeg_component_info *compptr;
+
+  mainp = (my_main_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_main_controller));
+  cinfo->main = &mainp->pub;
+  mainp->pub.start_pass = start_pass_main;
+
+  if (need_full_buffer)		/* shouldn't happen */
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+
+  /* Allocate the workspace.
+   * ngroups is the number of row groups we need.
+   */
+  if (cinfo->upsample->need_context_rows) {
+    if (cinfo->min_DCT_v_scaled_size < 2) /* unsupported, see comments above */
+      ERREXIT(cinfo, JERR_NOTIMPL);
+    alloc_funny_pointers(cinfo); /* Alloc space for xbuffer[] lists */
+    ngroups = cinfo->min_DCT_v_scaled_size + 2;
+  } else {
+    /* There are always min_DCT_v_scaled_size row groups in an iMCU row. */
+    ngroups = cinfo->min_DCT_v_scaled_size;
+    mainp->rowgroups_avail = (JDIMENSION) ngroups;
+  }
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
+      cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
+    mainp->buffer[ci] = (*cinfo->mem->alloc_sarray)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       compptr->width_in_blocks * ((JDIMENSION) compptr->DCT_h_scaled_size),
+       (JDIMENSION) (rgroup * ngroups));
+  }
+}
diff --git a/libraries/jpeg/jdmarker.c b/libraries/jpeg/jdmarker.c
new file mode 100644
index 000000000..3fbe5c165
--- /dev/null
+++ b/libraries/jpeg/jdmarker.c
@@ -0,0 +1,1511 @@
+/*
+ * jdmarker.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * Modified 2009-2013 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains routines to decode JPEG datastream markers.
+ * Most of the complexity arises from our desire to support input
+ * suspension: if not all of the data for a marker is available,
+ * we must exit back to the application.  On resumption, we reprocess
+ * the marker.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+typedef enum {			/* JPEG marker codes */
+  M_SOF0  = 0xc0,
+  M_SOF1  = 0xc1,
+  M_SOF2  = 0xc2,
+  M_SOF3  = 0xc3,
+
+  M_SOF5  = 0xc5,
+  M_SOF6  = 0xc6,
+  M_SOF7  = 0xc7,
+
+  M_JPG   = 0xc8,
+  M_SOF9  = 0xc9,
+  M_SOF10 = 0xca,
+  M_SOF11 = 0xcb,
+
+  M_SOF13 = 0xcd,
+  M_SOF14 = 0xce,
+  M_SOF15 = 0xcf,
+
+  M_DHT   = 0xc4,
+
+  M_DAC   = 0xcc,
+
+  M_RST0  = 0xd0,
+  M_RST1  = 0xd1,
+  M_RST2  = 0xd2,
+  M_RST3  = 0xd3,
+  M_RST4  = 0xd4,
+  M_RST5  = 0xd5,
+  M_RST6  = 0xd6,
+  M_RST7  = 0xd7,
+
+  M_SOI   = 0xd8,
+  M_EOI   = 0xd9,
+  M_SOS   = 0xda,
+  M_DQT   = 0xdb,
+  M_DNL   = 0xdc,
+  M_DRI   = 0xdd,
+  M_DHP   = 0xde,
+  M_EXP   = 0xdf,
+
+  M_APP0  = 0xe0,
+  M_APP1  = 0xe1,
+  M_APP2  = 0xe2,
+  M_APP3  = 0xe3,
+  M_APP4  = 0xe4,
+  M_APP5  = 0xe5,
+  M_APP6  = 0xe6,
+  M_APP7  = 0xe7,
+  M_APP8  = 0xe8,
+  M_APP9  = 0xe9,
+  M_APP10 = 0xea,
+  M_APP11 = 0xeb,
+  M_APP12 = 0xec,
+  M_APP13 = 0xed,
+  M_APP14 = 0xee,
+  M_APP15 = 0xef,
+
+  M_JPG0  = 0xf0,
+  M_JPG8  = 0xf8,
+  M_JPG13 = 0xfd,
+  M_COM   = 0xfe,
+
+  M_TEM   = 0x01,
+
+  M_ERROR = 0x100
+} JPEG_MARKER;
+
+
+/* Private state */
+
+typedef struct {
+  struct jpeg_marker_reader pub; /* public fields */
+
+  /* Application-overridable marker processing methods */
+  jpeg_marker_parser_method process_COM;
+  jpeg_marker_parser_method process_APPn[16];
+
+  /* Limit on marker data length to save for each marker type */
+  unsigned int length_limit_COM;
+  unsigned int length_limit_APPn[16];
+
+  /* Status of COM/APPn marker saving */
+  jpeg_saved_marker_ptr cur_marker;	/* NULL if not processing a marker */
+  unsigned int bytes_read;		/* data bytes read so far in marker */
+  /* Note: cur_marker is not linked into marker_list until it's all read. */
+} my_marker_reader;
+
+typedef my_marker_reader * my_marker_ptr;
+
+
+/*
+ * Macros for fetching data from the data source module.
+ *
+ * At all times, cinfo->src->next_input_byte and ->bytes_in_buffer reflect
+ * the current restart point; we update them only when we have reached a
+ * suitable place to restart if a suspension occurs.
+ */
+
+/* Declare and initialize local copies of input pointer/count */
+#define INPUT_VARS(cinfo)  \
+	struct jpeg_source_mgr * datasrc = (cinfo)->src;  \
+	const JOCTET * next_input_byte = datasrc->next_input_byte;  \
+	size_t bytes_in_buffer = datasrc->bytes_in_buffer
+
+/* Unload the local copies --- do this only at a restart boundary */
+#define INPUT_SYNC(cinfo)  \
+	( datasrc->next_input_byte = next_input_byte,  \
+	  datasrc->bytes_in_buffer = bytes_in_buffer )
+
+/* Reload the local copies --- used only in MAKE_BYTE_AVAIL */
+#define INPUT_RELOAD(cinfo)  \
+	( next_input_byte = datasrc->next_input_byte,  \
+	  bytes_in_buffer = datasrc->bytes_in_buffer )
+
+/* Internal macro for INPUT_BYTE and INPUT_2BYTES: make a byte available.
+ * Note we do *not* do INPUT_SYNC before calling fill_input_buffer,
+ * but we must reload the local copies after a successful fill.
+ */
+#define MAKE_BYTE_AVAIL(cinfo,action)  \
+	if (bytes_in_buffer == 0) {  \
+	  if (! (*datasrc->fill_input_buffer) (cinfo))  \
+	    { action; }  \
+	  INPUT_RELOAD(cinfo);  \
+	}
+
+/* Read a byte into variable V.
+ * If must suspend, take the specified action (typically "return FALSE").
+ */
+#define INPUT_BYTE(cinfo,V,action)  \
+	MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \
+		  bytes_in_buffer--; \
+		  V = GETJOCTET(*next_input_byte++); )
+
+/* As above, but read two bytes interpreted as an unsigned 16-bit integer.
+ * V should be declared unsigned int or perhaps INT32.
+ */
+#define INPUT_2BYTES(cinfo,V,action)  \
+	MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \
+		  bytes_in_buffer--; \
+		  V = ((unsigned int) GETJOCTET(*next_input_byte++)) << 8; \
+		  MAKE_BYTE_AVAIL(cinfo,action); \
+		  bytes_in_buffer--; \
+		  V += GETJOCTET(*next_input_byte++); )
+
+
+/*
+ * Routines to process JPEG markers.
+ *
+ * Entry condition: JPEG marker itself has been read and its code saved
+ *   in cinfo->unread_marker; input restart point is just after the marker.
+ *
+ * Exit: if return TRUE, have read and processed any parameters, and have
+ *   updated the restart point to point after the parameters.
+ *   If return FALSE, was forced to suspend before reaching end of
+ *   marker parameters; restart point has not been moved.  Same routine
+ *   will be called again after application supplies more input data.
+ *
+ * This approach to suspension assumes that all of a marker's parameters
+ * can fit into a single input bufferload.  This should hold for "normal"
+ * markers.  Some COM/APPn markers might have large parameter segments
+ * that might not fit.  If we are simply dropping such a marker, we use
+ * skip_input_data to get past it, and thereby put the problem on the
+ * source manager's shoulders.  If we are saving the marker's contents
+ * into memory, we use a slightly different convention: when forced to
+ * suspend, the marker processor updates the restart point to the end of
+ * what it's consumed (ie, the end of the buffer) before returning FALSE.
+ * On resumption, cinfo->unread_marker still contains the marker code,
+ * but the data source will point to the next chunk of marker data.
+ * The marker processor must retain internal state to deal with this.
+ *
+ * Note that we don't bother to avoid duplicate trace messages if a
+ * suspension occurs within marker parameters.  Other side effects
+ * require more care.
+ */
+
+
+LOCAL(boolean)
+get_soi (j_decompress_ptr cinfo)
+/* Process an SOI marker */
+{
+  int i;
+  
+  TRACEMS(cinfo, 1, JTRC_SOI);
+
+  if (cinfo->marker->saw_SOI)
+    ERREXIT(cinfo, JERR_SOI_DUPLICATE);
+
+  /* Reset all parameters that are defined to be reset by SOI */
+
+  for (i = 0; i < NUM_ARITH_TBLS; i++) {
+    cinfo->arith_dc_L[i] = 0;
+    cinfo->arith_dc_U[i] = 1;
+    cinfo->arith_ac_K[i] = 5;
+  }
+  cinfo->restart_interval = 0;
+
+  /* Set initial assumptions for colorspace etc */
+
+  cinfo->jpeg_color_space = JCS_UNKNOWN;
+  cinfo->color_transform = JCT_NONE;
+  cinfo->CCIR601_sampling = FALSE; /* Assume non-CCIR sampling??? */
+
+  cinfo->saw_JFIF_marker = FALSE;
+  cinfo->JFIF_major_version = 1; /* set default JFIF APP0 values */
+  cinfo->JFIF_minor_version = 1;
+  cinfo->density_unit = 0;
+  cinfo->X_density = 1;
+  cinfo->Y_density = 1;
+  cinfo->saw_Adobe_marker = FALSE;
+  cinfo->Adobe_transform = 0;
+
+  cinfo->marker->saw_SOI = TRUE;
+
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_sof (j_decompress_ptr cinfo, boolean is_baseline, boolean is_prog,
+	 boolean is_arith)
+/* Process a SOFn marker */
+{
+  INT32 length;
+  int c, ci, i;
+  jpeg_component_info * compptr;
+  INPUT_VARS(cinfo);
+
+  cinfo->is_baseline = is_baseline;
+  cinfo->progressive_mode = is_prog;
+  cinfo->arith_code = is_arith;
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+
+  INPUT_BYTE(cinfo, cinfo->data_precision, return FALSE);
+  INPUT_2BYTES(cinfo, cinfo->image_height, return FALSE);
+  INPUT_2BYTES(cinfo, cinfo->image_width, return FALSE);
+  INPUT_BYTE(cinfo, cinfo->num_components, return FALSE);
+
+  length -= 8;
+
+  TRACEMS4(cinfo, 1, JTRC_SOF, cinfo->unread_marker,
+	   (int) cinfo->image_width, (int) cinfo->image_height,
+	   cinfo->num_components);
+
+  if (cinfo->marker->saw_SOF)
+    ERREXIT(cinfo, JERR_SOF_DUPLICATE);
+
+  /* We don't support files in which the image height is initially specified */
+  /* as 0 and is later redefined by DNL.  As long as we have to check that,  */
+  /* might as well have a general sanity check. */
+  if (cinfo->image_height <= 0 || cinfo->image_width <= 0 ||
+      cinfo->num_components <= 0)
+    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
+
+  if (length != (cinfo->num_components * 3))
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  if (cinfo->comp_info == NULL)	/* do only once, even if suspend */
+    cinfo->comp_info = (jpeg_component_info *) (*cinfo->mem->alloc_small)
+			((j_common_ptr) cinfo, JPOOL_IMAGE,
+			 cinfo->num_components * SIZEOF(jpeg_component_info));
+
+  for (ci = 0; ci < cinfo->num_components; ci++) {
+    INPUT_BYTE(cinfo, c, return FALSE);
+    /* Check to see whether component id has already been seen   */
+    /* (in violation of the spec, but unfortunately seen in some */
+    /* files).  If so, create "fake" component id equal to the   */
+    /* max id seen so far + 1. */
+    for (i = 0, compptr = cinfo->comp_info; i < ci; i++, compptr++) {
+      if (c == compptr->component_id) {
+	compptr = cinfo->comp_info;
+	c = compptr->component_id;
+	compptr++;
+	for (i = 1; i < ci; i++, compptr++) {
+	  if (compptr->component_id > c) c = compptr->component_id;
+	}
+	c++;
+	break;
+      }
+    }
+    compptr->component_id = c;
+    compptr->component_index = ci;
+    INPUT_BYTE(cinfo, c, return FALSE);
+    compptr->h_samp_factor = (c >> 4) & 15;
+    compptr->v_samp_factor = (c     ) & 15;
+    INPUT_BYTE(cinfo, compptr->quant_tbl_no, return FALSE);
+
+    TRACEMS4(cinfo, 1, JTRC_SOF_COMPONENT,
+	     compptr->component_id, compptr->h_samp_factor,
+	     compptr->v_samp_factor, compptr->quant_tbl_no);
+  }
+
+  cinfo->marker->saw_SOF = TRUE;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_sos (j_decompress_ptr cinfo)
+/* Process a SOS marker */
+{
+  INT32 length;
+  int c, ci, i, n;
+  jpeg_component_info * compptr;
+  INPUT_VARS(cinfo);
+
+  if (! cinfo->marker->saw_SOF)
+    ERREXITS(cinfo, JERR_SOF_BEFORE, "SOS");
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+
+  INPUT_BYTE(cinfo, n, return FALSE); /* Number of components */
+
+  TRACEMS1(cinfo, 1, JTRC_SOS, n);
+
+  if (length != (n * 2 + 6) || n > MAX_COMPS_IN_SCAN ||
+      (n == 0 && !cinfo->progressive_mode))
+      /* pseudo SOS marker only allowed in progressive mode */
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  cinfo->comps_in_scan = n;
+
+  /* Collect the component-spec parameters */
+
+  for (i = 0; i < n; i++) {
+    INPUT_BYTE(cinfo, c, return FALSE);
+
+    /* Detect the case where component id's are not unique, and, if so, */
+    /* create a fake component id using the same logic as in get_sof.   */
+    /* Note:  This also ensures that all of the SOF components are      */
+    /* referenced in the single scan case, which prevents access to     */
+    /* uninitialized memory in later decoding stages. */
+    for (ci = 0; ci < i; ci++) {
+      if (c == cinfo->cur_comp_info[ci]->component_id) {
+	c = cinfo->cur_comp_info[0]->component_id;
+	for (ci = 1; ci < i; ci++) {
+	  compptr = cinfo->cur_comp_info[ci];
+	  if (compptr->component_id > c) c = compptr->component_id;
+	}
+	c++;
+	break;
+      }
+    }
+
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      if (c == compptr->component_id)
+	goto id_found;
+    }
+
+    ERREXIT1(cinfo, JERR_BAD_COMPONENT_ID, c);
+
+  id_found:
+
+    cinfo->cur_comp_info[i] = compptr;
+    INPUT_BYTE(cinfo, c, return FALSE);
+    compptr->dc_tbl_no = (c >> 4) & 15;
+    compptr->ac_tbl_no = (c     ) & 15;
+
+    TRACEMS3(cinfo, 1, JTRC_SOS_COMPONENT, compptr->component_id,
+	     compptr->dc_tbl_no, compptr->ac_tbl_no);
+  }
+
+  /* Collect the additional scan parameters Ss, Se, Ah/Al. */
+  INPUT_BYTE(cinfo, c, return FALSE);
+  cinfo->Ss = c;
+  INPUT_BYTE(cinfo, c, return FALSE);
+  cinfo->Se = c;
+  INPUT_BYTE(cinfo, c, return FALSE);
+  cinfo->Ah = (c >> 4) & 15;
+  cinfo->Al = (c     ) & 15;
+
+  TRACEMS4(cinfo, 1, JTRC_SOS_PARAMS, cinfo->Ss, cinfo->Se,
+	   cinfo->Ah, cinfo->Al);
+
+  /* Prepare to scan data & restart markers */
+  cinfo->marker->next_restart_num = 0;
+
+  /* Count another (non-pseudo) SOS marker */
+  if (n) cinfo->input_scan_number++;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+#ifdef D_ARITH_CODING_SUPPORTED
+
+LOCAL(boolean)
+get_dac (j_decompress_ptr cinfo)
+/* Process a DAC marker */
+{
+  INT32 length;
+  int index, val;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+  
+  while (length > 0) {
+    INPUT_BYTE(cinfo, index, return FALSE);
+    INPUT_BYTE(cinfo, val, return FALSE);
+
+    length -= 2;
+
+    TRACEMS2(cinfo, 1, JTRC_DAC, index, val);
+
+    if (index < 0 || index >= (2*NUM_ARITH_TBLS))
+      ERREXIT1(cinfo, JERR_DAC_INDEX, index);
+
+    if (index >= NUM_ARITH_TBLS) { /* define AC table */
+      cinfo->arith_ac_K[index-NUM_ARITH_TBLS] = (UINT8) val;
+    } else {			/* define DC table */
+      cinfo->arith_dc_L[index] = (UINT8) (val & 0x0F);
+      cinfo->arith_dc_U[index] = (UINT8) (val >> 4);
+      if (cinfo->arith_dc_L[index] > cinfo->arith_dc_U[index])
+	ERREXIT1(cinfo, JERR_DAC_VALUE, val);
+    }
+  }
+
+  if (length != 0)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+#else /* ! D_ARITH_CODING_SUPPORTED */
+
+#define get_dac(cinfo)  skip_variable(cinfo)
+
+#endif /* D_ARITH_CODING_SUPPORTED */
+
+
+LOCAL(boolean)
+get_dht (j_decompress_ptr cinfo)
+/* Process a DHT marker */
+{
+  INT32 length;
+  UINT8 bits[17];
+  UINT8 huffval[256];
+  int i, index, count;
+  JHUFF_TBL **htblptr;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+  
+  while (length > 16) {
+    INPUT_BYTE(cinfo, index, return FALSE);
+
+    TRACEMS1(cinfo, 1, JTRC_DHT, index);
+      
+    bits[0] = 0;
+    count = 0;
+    for (i = 1; i <= 16; i++) {
+      INPUT_BYTE(cinfo, bits[i], return FALSE);
+      count += bits[i];
+    }
+
+    length -= 1 + 16;
+
+    TRACEMS8(cinfo, 2, JTRC_HUFFBITS,
+	     bits[1], bits[2], bits[3], bits[4],
+	     bits[5], bits[6], bits[7], bits[8]);
+    TRACEMS8(cinfo, 2, JTRC_HUFFBITS,
+	     bits[9], bits[10], bits[11], bits[12],
+	     bits[13], bits[14], bits[15], bits[16]);
+
+    /* Here we just do minimal validation of the counts to avoid walking
+     * off the end of our table space.  jdhuff.c will check more carefully.
+     */
+    if (count > 256 || ((INT32) count) > length)
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+
+    MEMZERO(huffval, SIZEOF(huffval)); /* pre-zero array for later copy */
+
+    for (i = 0; i < count; i++)
+      INPUT_BYTE(cinfo, huffval[i], return FALSE);
+
+    length -= count;
+
+    if (index & 0x10) {		/* AC table definition */
+      index -= 0x10;
+      htblptr = &cinfo->ac_huff_tbl_ptrs[index];
+    } else {			/* DC table definition */
+      htblptr = &cinfo->dc_huff_tbl_ptrs[index];
+    }
+
+    if (index < 0 || index >= NUM_HUFF_TBLS)
+      ERREXIT1(cinfo, JERR_DHT_INDEX, index);
+
+    if (*htblptr == NULL)
+      *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
+  
+    MEMCOPY((*htblptr)->bits, bits, SIZEOF((*htblptr)->bits));
+    MEMCOPY((*htblptr)->huffval, huffval, SIZEOF((*htblptr)->huffval));
+  }
+
+  if (length != 0)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_dqt (j_decompress_ptr cinfo)
+/* Process a DQT marker */
+{
+  INT32 length, count, i;
+  int n, prec;
+  unsigned int tmp;
+  JQUANT_TBL *quant_ptr;
+  const int *natural_order;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+
+  while (length > 0) {
+    length--;
+    INPUT_BYTE(cinfo, n, return FALSE);
+    prec = n >> 4;
+    n &= 0x0F;
+
+    TRACEMS2(cinfo, 1, JTRC_DQT, n, prec);
+
+    if (n >= NUM_QUANT_TBLS)
+      ERREXIT1(cinfo, JERR_DQT_INDEX, n);
+      
+    if (cinfo->quant_tbl_ptrs[n] == NULL)
+      cinfo->quant_tbl_ptrs[n] = jpeg_alloc_quant_table((j_common_ptr) cinfo);
+    quant_ptr = cinfo->quant_tbl_ptrs[n];
+
+    if (prec) {
+      if (length < DCTSIZE2 * 2) {
+	/* Initialize full table for safety. */
+	for (i = 0; i < DCTSIZE2; i++) {
+	  quant_ptr->quantval[i] = 1;
+	}
+	count = length >> 1;
+      } else
+	count = DCTSIZE2;
+    } else {
+      if (length < DCTSIZE2) {
+	/* Initialize full table for safety. */
+	for (i = 0; i < DCTSIZE2; i++) {
+	  quant_ptr->quantval[i] = 1;
+	}
+	count = length;
+      } else
+	count = DCTSIZE2;
+    }
+
+    switch (count) {
+    case (2*2): natural_order = jpeg_natural_order2; break;
+    case (3*3): natural_order = jpeg_natural_order3; break;
+    case (4*4): natural_order = jpeg_natural_order4; break;
+    case (5*5): natural_order = jpeg_natural_order5; break;
+    case (6*6): natural_order = jpeg_natural_order6; break;
+    case (7*7): natural_order = jpeg_natural_order7; break;
+    default:    natural_order = jpeg_natural_order;  break;
+    }
+
+    for (i = 0; i < count; i++) {
+      if (prec)
+	INPUT_2BYTES(cinfo, tmp, return FALSE);
+      else
+	INPUT_BYTE(cinfo, tmp, return FALSE);
+      /* We convert the zigzag-order table to natural array order. */
+      quant_ptr->quantval[natural_order[i]] = (UINT16) tmp;
+    }
+
+    if (cinfo->err->trace_level >= 2) {
+      for (i = 0; i < DCTSIZE2; i += 8) {
+	TRACEMS8(cinfo, 2, JTRC_QUANTVALS,
+		 quant_ptr->quantval[i],   quant_ptr->quantval[i+1],
+		 quant_ptr->quantval[i+2], quant_ptr->quantval[i+3],
+		 quant_ptr->quantval[i+4], quant_ptr->quantval[i+5],
+		 quant_ptr->quantval[i+6], quant_ptr->quantval[i+7]);
+      }
+    }
+
+    length -= count;
+    if (prec) length -= count;
+  }
+
+  if (length != 0)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_dri (j_decompress_ptr cinfo)
+/* Process a DRI marker */
+{
+  INT32 length;
+  unsigned int tmp;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  
+  if (length != 4)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+
+  TRACEMS1(cinfo, 1, JTRC_DRI, tmp);
+
+  cinfo->restart_interval = tmp;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_lse (j_decompress_ptr cinfo)
+/* Process an LSE marker */
+{
+  INT32 length;
+  unsigned int tmp;
+  int cid;
+  INPUT_VARS(cinfo);
+
+  if (! cinfo->marker->saw_SOF)
+    ERREXITS(cinfo, JERR_SOF_BEFORE, "LSE");
+
+  if (cinfo->num_components < 3) goto bad;
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+
+  if (length != 24)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_BYTE(cinfo, tmp, return FALSE);
+  if (tmp != 0x0D)	/* ID inverse transform specification */
+    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, cinfo->unread_marker);
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+  if (tmp != MAXJSAMPLE) goto bad;		/* MAXTRANS */
+  INPUT_BYTE(cinfo, tmp, return FALSE);
+  if (tmp != 3) goto bad;			/* Nt=3 */
+  INPUT_BYTE(cinfo, cid, return FALSE);
+  if (cid != cinfo->comp_info[1].component_id) goto bad;
+  INPUT_BYTE(cinfo, cid, return FALSE);
+  if (cid != cinfo->comp_info[0].component_id) goto bad;
+  INPUT_BYTE(cinfo, cid, return FALSE);
+  if (cid != cinfo->comp_info[2].component_id) goto bad;
+  INPUT_BYTE(cinfo, tmp, return FALSE);
+  if (tmp != 0x80) goto bad;		/* F1: CENTER1=1, NORM1=0 */
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+  if (tmp != 0) goto bad;			/* A(1,1)=0 */
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+  if (tmp != 0) goto bad;			/* A(1,2)=0 */
+  INPUT_BYTE(cinfo, tmp, return FALSE);
+  if (tmp != 0) goto bad;		/* F2: CENTER2=0, NORM2=0 */
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+  if (tmp != 1) goto bad;			/* A(2,1)=1 */
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+  if (tmp != 0) goto bad;			/* A(2,2)=0 */
+  INPUT_BYTE(cinfo, tmp, return FALSE);
+  if (tmp != 0) goto bad;		/* F3: CENTER3=0, NORM3=0 */
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+  if (tmp != 1) goto bad;			/* A(3,1)=1 */
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+  if (tmp != 0) {				/* A(3,2)=0 */
+    bad:
+    ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+  }
+
+  /* OK, valid transform that we can handle. */
+  cinfo->color_transform = JCT_SUBTRACT_GREEN;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+/*
+ * Routines for processing APPn and COM markers.
+ * These are either saved in memory or discarded, per application request.
+ * APP0 and APP14 are specially checked to see if they are
+ * JFIF and Adobe markers, respectively.
+ */
+
+#define APP0_DATA_LEN	14	/* Length of interesting data in APP0 */
+#define APP14_DATA_LEN	12	/* Length of interesting data in APP14 */
+#define APPN_DATA_LEN	14	/* Must be the largest of the above!! */
+
+
+LOCAL(void)
+examine_app0 (j_decompress_ptr cinfo, JOCTET FAR * data,
+	      unsigned int datalen, INT32 remaining)
+/* Examine first few bytes from an APP0.
+ * Take appropriate action if it is a JFIF marker.
+ * datalen is # of bytes at data[], remaining is length of rest of marker data.
+ */
+{
+  INT32 totallen = (INT32) datalen + remaining;
+
+  if (datalen >= APP0_DATA_LEN &&
+      GETJOCTET(data[0]) == 0x4A &&
+      GETJOCTET(data[1]) == 0x46 &&
+      GETJOCTET(data[2]) == 0x49 &&
+      GETJOCTET(data[3]) == 0x46 &&
+      GETJOCTET(data[4]) == 0) {
+    /* Found JFIF APP0 marker: save info */
+    cinfo->saw_JFIF_marker = TRUE;
+    cinfo->JFIF_major_version = GETJOCTET(data[5]);
+    cinfo->JFIF_minor_version = GETJOCTET(data[6]);
+    cinfo->density_unit = GETJOCTET(data[7]);
+    cinfo->X_density = (GETJOCTET(data[8]) << 8) + GETJOCTET(data[9]);
+    cinfo->Y_density = (GETJOCTET(data[10]) << 8) + GETJOCTET(data[11]);
+    /* Check version.
+     * Major version must be 1 or 2, anything else signals an incompatible
+     * change.
+     * (We used to treat this as an error, but now it's a nonfatal warning,
+     * because some bozo at Hijaak couldn't read the spec.)
+     * Minor version should be 0..2, but process anyway if newer.
+     */
+    if (cinfo->JFIF_major_version != 1 && cinfo->JFIF_major_version != 2)
+      WARNMS2(cinfo, JWRN_JFIF_MAJOR,
+	      cinfo->JFIF_major_version, cinfo->JFIF_minor_version);
+    /* Generate trace messages */
+    TRACEMS5(cinfo, 1, JTRC_JFIF,
+	     cinfo->JFIF_major_version, cinfo->JFIF_minor_version,
+	     cinfo->X_density, cinfo->Y_density, cinfo->density_unit);
+    /* Validate thumbnail dimensions and issue appropriate messages */
+    if (GETJOCTET(data[12]) | GETJOCTET(data[13]))
+      TRACEMS2(cinfo, 1, JTRC_JFIF_THUMBNAIL,
+	       GETJOCTET(data[12]), GETJOCTET(data[13]));
+    totallen -= APP0_DATA_LEN;
+    if (totallen !=
+	((INT32)GETJOCTET(data[12]) * (INT32)GETJOCTET(data[13]) * (INT32) 3))
+      TRACEMS1(cinfo, 1, JTRC_JFIF_BADTHUMBNAILSIZE, (int) totallen);
+  } else if (datalen >= 6 &&
+      GETJOCTET(data[0]) == 0x4A &&
+      GETJOCTET(data[1]) == 0x46 &&
+      GETJOCTET(data[2]) == 0x58 &&
+      GETJOCTET(data[3]) == 0x58 &&
+      GETJOCTET(data[4]) == 0) {
+    /* Found JFIF "JFXX" extension APP0 marker */
+    /* The library doesn't actually do anything with these,
+     * but we try to produce a helpful trace message.
+     */
+    switch (GETJOCTET(data[5])) {
+    case 0x10:
+      TRACEMS1(cinfo, 1, JTRC_THUMB_JPEG, (int) totallen);
+      break;
+    case 0x11:
+      TRACEMS1(cinfo, 1, JTRC_THUMB_PALETTE, (int) totallen);
+      break;
+    case 0x13:
+      TRACEMS1(cinfo, 1, JTRC_THUMB_RGB, (int) totallen);
+      break;
+    default:
+      TRACEMS2(cinfo, 1, JTRC_JFIF_EXTENSION,
+	       GETJOCTET(data[5]), (int) totallen);
+      break;
+    }
+  } else {
+    /* Start of APP0 does not match "JFIF" or "JFXX", or too short */
+    TRACEMS1(cinfo, 1, JTRC_APP0, (int) totallen);
+  }
+}
+
+
+LOCAL(void)
+examine_app14 (j_decompress_ptr cinfo, JOCTET FAR * data,
+	       unsigned int datalen, INT32 remaining)
+/* Examine first few bytes from an APP14.
+ * Take appropriate action if it is an Adobe marker.
+ * datalen is # of bytes at data[], remaining is length of rest of marker data.
+ */
+{
+  unsigned int version, flags0, flags1, transform;
+
+  if (datalen >= APP14_DATA_LEN &&
+      GETJOCTET(data[0]) == 0x41 &&
+      GETJOCTET(data[1]) == 0x64 &&
+      GETJOCTET(data[2]) == 0x6F &&
+      GETJOCTET(data[3]) == 0x62 &&
+      GETJOCTET(data[4]) == 0x65) {
+    /* Found Adobe APP14 marker */
+    version = (GETJOCTET(data[5]) << 8) + GETJOCTET(data[6]);
+    flags0 = (GETJOCTET(data[7]) << 8) + GETJOCTET(data[8]);
+    flags1 = (GETJOCTET(data[9]) << 8) + GETJOCTET(data[10]);
+    transform = GETJOCTET(data[11]);
+    TRACEMS4(cinfo, 1, JTRC_ADOBE, version, flags0, flags1, transform);
+    cinfo->saw_Adobe_marker = TRUE;
+    cinfo->Adobe_transform = (UINT8) transform;
+  } else {
+    /* Start of APP14 does not match "Adobe", or too short */
+    TRACEMS1(cinfo, 1, JTRC_APP14, (int) (datalen + remaining));
+  }
+}
+
+
+METHODDEF(boolean)
+get_interesting_appn (j_decompress_ptr cinfo)
+/* Process an APP0 or APP14 marker without saving it */
+{
+  INT32 length;
+  JOCTET b[APPN_DATA_LEN];
+  unsigned int i, numtoread;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+
+  /* get the interesting part of the marker data */
+  if (length >= APPN_DATA_LEN)
+    numtoread = APPN_DATA_LEN;
+  else if (length > 0)
+    numtoread = (unsigned int) length;
+  else
+    numtoread = 0;
+  for (i = 0; i < numtoread; i++)
+    INPUT_BYTE(cinfo, b[i], return FALSE);
+  length -= numtoread;
+
+  /* process it */
+  switch (cinfo->unread_marker) {
+  case M_APP0:
+    examine_app0(cinfo, (JOCTET FAR *) b, numtoread, length);
+    break;
+  case M_APP14:
+    examine_app14(cinfo, (JOCTET FAR *) b, numtoread, length);
+    break;
+  default:
+    /* can't get here unless jpeg_save_markers chooses wrong processor */
+    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, cinfo->unread_marker);
+    break;
+  }
+
+  /* skip any remaining data -- could be lots */
+  INPUT_SYNC(cinfo);
+  if (length > 0)
+    (*cinfo->src->skip_input_data) (cinfo, (long) length);
+
+  return TRUE;
+}
+
+
+#ifdef SAVE_MARKERS_SUPPORTED
+
+METHODDEF(boolean)
+save_marker (j_decompress_ptr cinfo)
+/* Save an APPn or COM marker into the marker list */
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+  jpeg_saved_marker_ptr cur_marker = marker->cur_marker;
+  unsigned int bytes_read, data_length;
+  JOCTET FAR * data;
+  INT32 length = 0;
+  INPUT_VARS(cinfo);
+
+  if (cur_marker == NULL) {
+    /* begin reading a marker */
+    INPUT_2BYTES(cinfo, length, return FALSE);
+    length -= 2;
+    if (length >= 0) {		/* watch out for bogus length word */
+      /* figure out how much we want to save */
+      unsigned int limit;
+      if (cinfo->unread_marker == (int) M_COM)
+	limit = marker->length_limit_COM;
+      else
+	limit = marker->length_limit_APPn[cinfo->unread_marker - (int) M_APP0];
+      if ((unsigned int) length < limit)
+	limit = (unsigned int) length;
+      /* allocate and initialize the marker item */
+      cur_marker = (jpeg_saved_marker_ptr)
+	(*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				    SIZEOF(struct jpeg_marker_struct) + limit);
+      cur_marker->next = NULL;
+      cur_marker->marker = (UINT8) cinfo->unread_marker;
+      cur_marker->original_length = (unsigned int) length;
+      cur_marker->data_length = limit;
+      /* data area is just beyond the jpeg_marker_struct */
+      data = cur_marker->data = (JOCTET FAR *) (cur_marker + 1);
+      marker->cur_marker = cur_marker;
+      marker->bytes_read = 0;
+      bytes_read = 0;
+      data_length = limit;
+    } else {
+      /* deal with bogus length word */
+      bytes_read = data_length = 0;
+      data = NULL;
+    }
+  } else {
+    /* resume reading a marker */
+    bytes_read = marker->bytes_read;
+    data_length = cur_marker->data_length;
+    data = cur_marker->data + bytes_read;
+  }
+
+  while (bytes_read < data_length) {
+    INPUT_SYNC(cinfo);		/* move the restart point to here */
+    marker->bytes_read = bytes_read;
+    /* If there's not at least one byte in buffer, suspend */
+    MAKE_BYTE_AVAIL(cinfo, return FALSE);
+    /* Copy bytes with reasonable rapidity */
+    while (bytes_read < data_length && bytes_in_buffer > 0) {
+      *data++ = *next_input_byte++;
+      bytes_in_buffer--;
+      bytes_read++;
+    }
+  }
+
+  /* Done reading what we want to read */
+  if (cur_marker != NULL) {	/* will be NULL if bogus length word */
+    /* Add new marker to end of list */
+    if (cinfo->marker_list == NULL) {
+      cinfo->marker_list = cur_marker;
+    } else {
+      jpeg_saved_marker_ptr prev = cinfo->marker_list;
+      while (prev->next != NULL)
+	prev = prev->next;
+      prev->next = cur_marker;
+    }
+    /* Reset pointer & calc remaining data length */
+    data = cur_marker->data;
+    length = cur_marker->original_length - data_length;
+  }
+  /* Reset to initial state for next marker */
+  marker->cur_marker = NULL;
+
+  /* Process the marker if interesting; else just make a generic trace msg */
+  switch (cinfo->unread_marker) {
+  case M_APP0:
+    examine_app0(cinfo, data, data_length, length);
+    break;
+  case M_APP14:
+    examine_app14(cinfo, data, data_length, length);
+    break;
+  default:
+    TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker,
+	     (int) (data_length + length));
+    break;
+  }
+
+  /* skip any remaining data -- could be lots */
+  INPUT_SYNC(cinfo);		/* do before skip_input_data */
+  if (length > 0)
+    (*cinfo->src->skip_input_data) (cinfo, (long) length);
+
+  return TRUE;
+}
+
+#endif /* SAVE_MARKERS_SUPPORTED */
+
+
+METHODDEF(boolean)
+skip_variable (j_decompress_ptr cinfo)
+/* Skip over an unknown or uninteresting variable-length marker */
+{
+  INT32 length;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+  
+  TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker, (int) length);
+
+  INPUT_SYNC(cinfo);		/* do before skip_input_data */
+  if (length > 0)
+    (*cinfo->src->skip_input_data) (cinfo, (long) length);
+
+  return TRUE;
+}
+
+
+/*
+ * Find the next JPEG marker, save it in cinfo->unread_marker.
+ * Returns FALSE if had to suspend before reaching a marker;
+ * in that case cinfo->unread_marker is unchanged.
+ *
+ * Note that the result might not be a valid marker code,
+ * but it will never be 0 or FF.
+ */
+
+LOCAL(boolean)
+next_marker (j_decompress_ptr cinfo)
+{
+  int c;
+  INPUT_VARS(cinfo);
+
+  for (;;) {
+    INPUT_BYTE(cinfo, c, return FALSE);
+    /* Skip any non-FF bytes.
+     * This may look a bit inefficient, but it will not occur in a valid file.
+     * We sync after each discarded byte so that a suspending data source
+     * can discard the byte from its buffer.
+     */
+    while (c != 0xFF) {
+      cinfo->marker->discarded_bytes++;
+      INPUT_SYNC(cinfo);
+      INPUT_BYTE(cinfo, c, return FALSE);
+    }
+    /* This loop swallows any duplicate FF bytes.  Extra FFs are legal as
+     * pad bytes, so don't count them in discarded_bytes.  We assume there
+     * will not be so many consecutive FF bytes as to overflow a suspending
+     * data source's input buffer.
+     */
+    do {
+      INPUT_BYTE(cinfo, c, return FALSE);
+    } while (c == 0xFF);
+    if (c != 0)
+      break;			/* found a valid marker, exit loop */
+    /* Reach here if we found a stuffed-zero data sequence (FF/00).
+     * Discard it and loop back to try again.
+     */
+    cinfo->marker->discarded_bytes += 2;
+    INPUT_SYNC(cinfo);
+  }
+
+  if (cinfo->marker->discarded_bytes != 0) {
+    WARNMS2(cinfo, JWRN_EXTRANEOUS_DATA, cinfo->marker->discarded_bytes, c);
+    cinfo->marker->discarded_bytes = 0;
+  }
+
+  cinfo->unread_marker = c;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+first_marker (j_decompress_ptr cinfo)
+/* Like next_marker, but used to obtain the initial SOI marker. */
+/* For this marker, we do not allow preceding garbage or fill; otherwise,
+ * we might well scan an entire input file before realizing it ain't JPEG.
+ * If an application wants to process non-JFIF files, it must seek to the
+ * SOI before calling the JPEG library.
+ */
+{
+  int c, c2;
+  INPUT_VARS(cinfo);
+
+  INPUT_BYTE(cinfo, c, return FALSE);
+  INPUT_BYTE(cinfo, c2, return FALSE);
+  if (c != 0xFF || c2 != (int) M_SOI)
+    ERREXIT2(cinfo, JERR_NO_SOI, c, c2);
+
+  cinfo->unread_marker = c2;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+/*
+ * Read markers until SOS or EOI.
+ *
+ * Returns same codes as are defined for jpeg_consume_input:
+ * JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
+ *
+ * Note: This function may return a pseudo SOS marker (with zero
+ * component number) for treat by input controller's consume_input.
+ * consume_input itself should filter out (skip) the pseudo marker
+ * after processing for the caller.
+ */
+
+METHODDEF(int)
+read_markers (j_decompress_ptr cinfo)
+{
+  /* Outer loop repeats once for each marker. */
+  for (;;) {
+    /* Collect the marker proper, unless we already did. */
+    /* NB: first_marker() enforces the requirement that SOI appear first. */
+    if (cinfo->unread_marker == 0) {
+      if (! cinfo->marker->saw_SOI) {
+	if (! first_marker(cinfo))
+	  return JPEG_SUSPENDED;
+      } else {
+	if (! next_marker(cinfo))
+	  return JPEG_SUSPENDED;
+      }
+    }
+    /* At this point cinfo->unread_marker contains the marker code and the
+     * input point is just past the marker proper, but before any parameters.
+     * A suspension will cause us to return with this state still true.
+     */
+    switch (cinfo->unread_marker) {
+    case M_SOI:
+      if (! get_soi(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF0:		/* Baseline */
+      if (! get_sof(cinfo, TRUE, FALSE, FALSE))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF1:		/* Extended sequential, Huffman */
+      if (! get_sof(cinfo, FALSE, FALSE, FALSE))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF2:		/* Progressive, Huffman */
+      if (! get_sof(cinfo, FALSE, TRUE, FALSE))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF9:		/* Extended sequential, arithmetic */
+      if (! get_sof(cinfo, FALSE, FALSE, TRUE))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF10:		/* Progressive, arithmetic */
+      if (! get_sof(cinfo, FALSE, TRUE, TRUE))
+	return JPEG_SUSPENDED;
+      break;
+
+    /* Currently unsupported SOFn types */
+    case M_SOF3:		/* Lossless, Huffman */
+    case M_SOF5:		/* Differential sequential, Huffman */
+    case M_SOF6:		/* Differential progressive, Huffman */
+    case M_SOF7:		/* Differential lossless, Huffman */
+    case M_JPG:			/* Reserved for JPEG extensions */
+    case M_SOF11:		/* Lossless, arithmetic */
+    case M_SOF13:		/* Differential sequential, arithmetic */
+    case M_SOF14:		/* Differential progressive, arithmetic */
+    case M_SOF15:		/* Differential lossless, arithmetic */
+      ERREXIT1(cinfo, JERR_SOF_UNSUPPORTED, cinfo->unread_marker);
+      break;
+
+    case M_SOS:
+      if (! get_sos(cinfo))
+	return JPEG_SUSPENDED;
+      cinfo->unread_marker = 0;	/* processed the marker */
+      return JPEG_REACHED_SOS;
+
+    case M_EOI:
+      TRACEMS(cinfo, 1, JTRC_EOI);
+      cinfo->unread_marker = 0;	/* processed the marker */
+      return JPEG_REACHED_EOI;
+
+    case M_DAC:
+      if (! get_dac(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_DHT:
+      if (! get_dht(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_DQT:
+      if (! get_dqt(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_DRI:
+      if (! get_dri(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_JPG8:
+      if (! get_lse(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_APP0:
+    case M_APP1:
+    case M_APP2:
+    case M_APP3:
+    case M_APP4:
+    case M_APP5:
+    case M_APP6:
+    case M_APP7:
+    case M_APP8:
+    case M_APP9:
+    case M_APP10:
+    case M_APP11:
+    case M_APP12:
+    case M_APP13:
+    case M_APP14:
+    case M_APP15:
+      if (! (*((my_marker_ptr) cinfo->marker)->process_APPn[
+		cinfo->unread_marker - (int) M_APP0]) (cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_COM:
+      if (! (*((my_marker_ptr) cinfo->marker)->process_COM) (cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_RST0:		/* these are all parameterless */
+    case M_RST1:
+    case M_RST2:
+    case M_RST3:
+    case M_RST4:
+    case M_RST5:
+    case M_RST6:
+    case M_RST7:
+    case M_TEM:
+      TRACEMS1(cinfo, 1, JTRC_PARMLESS_MARKER, cinfo->unread_marker);
+      break;
+
+    case M_DNL:			/* Ignore DNL ... perhaps the wrong thing */
+      if (! skip_variable(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    default:			/* must be DHP, EXP, JPGn, or RESn */
+      /* For now, we treat the reserved markers as fatal errors since they are
+       * likely to be used to signal incompatible JPEG Part 3 extensions.
+       * Once the JPEG 3 version-number marker is well defined, this code
+       * ought to change!
+       */
+      ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, cinfo->unread_marker);
+      break;
+    }
+    /* Successfully processed marker, so reset state variable */
+    cinfo->unread_marker = 0;
+  } /* end loop */
+}
+
+
+/*
+ * Read a restart marker, which is expected to appear next in the datastream;
+ * if the marker is not there, take appropriate recovery action.
+ * Returns FALSE if suspension is required.
+ *
+ * This is called by the entropy decoder after it has read an appropriate
+ * number of MCUs.  cinfo->unread_marker may be nonzero if the entropy decoder
+ * has already read a marker from the data source.  Under normal conditions
+ * cinfo->unread_marker will be reset to 0 before returning; if not reset,
+ * it holds a marker which the decoder will be unable to read past.
+ */
+
+METHODDEF(boolean)
+read_restart_marker (j_decompress_ptr cinfo)
+{
+  /* Obtain a marker unless we already did. */
+  /* Note that next_marker will complain if it skips any data. */
+  if (cinfo->unread_marker == 0) {
+    if (! next_marker(cinfo))
+      return FALSE;
+  }
+
+  if (cinfo->unread_marker ==
+      ((int) M_RST0 + cinfo->marker->next_restart_num)) {
+    /* Normal case --- swallow the marker and let entropy decoder continue */
+    TRACEMS1(cinfo, 3, JTRC_RST, cinfo->marker->next_restart_num);
+    cinfo->unread_marker = 0;
+  } else {
+    /* Uh-oh, the restart markers have been messed up. */
+    /* Let the data source manager determine how to resync. */
+    if (! (*cinfo->src->resync_to_restart) (cinfo,
+					    cinfo->marker->next_restart_num))
+      return FALSE;
+  }
+
+  /* Update next-restart state */
+  cinfo->marker->next_restart_num = (cinfo->marker->next_restart_num + 1) & 7;
+
+  return TRUE;
+}
+
+
+/*
+ * This is the default resync_to_restart method for data source managers
+ * to use if they don't have any better approach.  Some data source managers
+ * may be able to back up, or may have additional knowledge about the data
+ * which permits a more intelligent recovery strategy; such managers would
+ * presumably supply their own resync method.
+ *
+ * read_restart_marker calls resync_to_restart if it finds a marker other than
+ * the restart marker it was expecting.  (This code is *not* used unless
+ * a nonzero restart interval has been declared.)  cinfo->unread_marker is
+ * the marker code actually found (might be anything, except 0 or FF).
+ * The desired restart marker number (0..7) is passed as a parameter.
+ * This routine is supposed to apply whatever error recovery strategy seems
+ * appropriate in order to position the input stream to the next data segment.
+ * Note that cinfo->unread_marker is treated as a marker appearing before
+ * the current data-source input point; usually it should be reset to zero
+ * before returning.
+ * Returns FALSE if suspension is required.
+ *
+ * This implementation is substantially constrained by wanting to treat the
+ * input as a data stream; this means we can't back up.  Therefore, we have
+ * only the following actions to work with:
+ *   1. Simply discard the marker and let the entropy decoder resume at next
+ *      byte of file.
+ *   2. Read forward until we find another marker, discarding intervening
+ *      data.  (In theory we could look ahead within the current bufferload,
+ *      without having to discard data if we don't find the desired marker.
+ *      This idea is not implemented here, in part because it makes behavior
+ *      dependent on buffer size and chance buffer-boundary positions.)
+ *   3. Leave the marker unread (by failing to zero cinfo->unread_marker).
+ *      This will cause the entropy decoder to process an empty data segment,
+ *      inserting dummy zeroes, and then we will reprocess the marker.
+ *
+ * #2 is appropriate if we think the desired marker lies ahead, while #3 is
+ * appropriate if the found marker is a future restart marker (indicating
+ * that we have missed the desired restart marker, probably because it got
+ * corrupted).
+ * We apply #2 or #3 if the found marker is a restart marker no more than
+ * two counts behind or ahead of the expected one.  We also apply #2 if the
+ * found marker is not a legal JPEG marker code (it's certainly bogus data).
+ * If the found marker is a restart marker more than 2 counts away, we do #1
+ * (too much risk that the marker is erroneous; with luck we will be able to
+ * resync at some future point).
+ * For any valid non-restart JPEG marker, we apply #3.  This keeps us from
+ * overrunning the end of a scan.  An implementation limited to single-scan
+ * files might find it better to apply #2 for markers other than EOI, since
+ * any other marker would have to be bogus data in that case.
+ */
+
+GLOBAL(boolean)
+jpeg_resync_to_restart (j_decompress_ptr cinfo, int desired)
+{
+  int marker = cinfo->unread_marker;
+  int action = 1;
+  
+  /* Always put up a warning. */
+  WARNMS2(cinfo, JWRN_MUST_RESYNC, marker, desired);
+  
+  /* Outer loop handles repeated decision after scanning forward. */
+  for (;;) {
+    if (marker < (int) M_SOF0)
+      action = 2;		/* invalid marker */
+    else if (marker < (int) M_RST0 || marker > (int) M_RST7)
+      action = 3;		/* valid non-restart marker */
+    else {
+      if (marker == ((int) M_RST0 + ((desired+1) & 7)) ||
+	  marker == ((int) M_RST0 + ((desired+2) & 7)))
+	action = 3;		/* one of the next two expected restarts */
+      else if (marker == ((int) M_RST0 + ((desired-1) & 7)) ||
+	       marker == ((int) M_RST0 + ((desired-2) & 7)))
+	action = 2;		/* a prior restart, so advance */
+      else
+	action = 1;		/* desired restart or too far away */
+    }
+    TRACEMS2(cinfo, 4, JTRC_RECOVERY_ACTION, marker, action);
+    switch (action) {
+    case 1:
+      /* Discard marker and let entropy decoder resume processing. */
+      cinfo->unread_marker = 0;
+      return TRUE;
+    case 2:
+      /* Scan to the next marker, and repeat the decision loop. */
+      if (! next_marker(cinfo))
+	return FALSE;
+      marker = cinfo->unread_marker;
+      break;
+    case 3:
+      /* Return without advancing past this marker. */
+      /* Entropy decoder will be forced to process an empty segment. */
+      return TRUE;
+    }
+  } /* end loop */
+}
+
+
+/*
+ * Reset marker processing state to begin a fresh datastream.
+ */
+
+METHODDEF(void)
+reset_marker_reader (j_decompress_ptr cinfo)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+
+  cinfo->comp_info = NULL;		/* until allocated by get_sof */
+  cinfo->input_scan_number = 0;		/* no SOS seen yet */
+  cinfo->unread_marker = 0;		/* no pending marker */
+  marker->pub.saw_SOI = FALSE;		/* set internal state too */
+  marker->pub.saw_SOF = FALSE;
+  marker->pub.discarded_bytes = 0;
+  marker->cur_marker = NULL;
+}
+
+
+/*
+ * Initialize the marker reader module.
+ * This is called only once, when the decompression object is created.
+ */
+
+GLOBAL(void)
+jinit_marker_reader (j_decompress_ptr cinfo)
+{
+  my_marker_ptr marker;
+  int i;
+
+  /* Create subobject in permanent pool */
+  marker = (my_marker_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				SIZEOF(my_marker_reader));
+  cinfo->marker = &marker->pub;
+  /* Initialize public method pointers */
+  marker->pub.reset_marker_reader = reset_marker_reader;
+  marker->pub.read_markers = read_markers;
+  marker->pub.read_restart_marker = read_restart_marker;
+  /* Initialize COM/APPn processing.
+   * By default, we examine and then discard APP0 and APP14,
+   * but simply discard COM and all other APPn.
+   */
+  marker->process_COM = skip_variable;
+  marker->length_limit_COM = 0;
+  for (i = 0; i < 16; i++) {
+    marker->process_APPn[i] = skip_variable;
+    marker->length_limit_APPn[i] = 0;
+  }
+  marker->process_APPn[0] = get_interesting_appn;
+  marker->process_APPn[14] = get_interesting_appn;
+  /* Reset marker processing state */
+  reset_marker_reader(cinfo);
+}
+
+
+/*
+ * Control saving of COM and APPn markers into marker_list.
+ */
+
+#ifdef SAVE_MARKERS_SUPPORTED
+
+GLOBAL(void)
+jpeg_save_markers (j_decompress_ptr cinfo, int marker_code,
+		   unsigned int length_limit)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+  long maxlength;
+  jpeg_marker_parser_method processor;
+
+  /* Length limit mustn't be larger than what we can allocate
+   * (should only be a concern in a 16-bit environment).
+   */
+  maxlength = cinfo->mem->max_alloc_chunk - SIZEOF(struct jpeg_marker_struct);
+  if (((long) length_limit) > maxlength)
+    length_limit = (unsigned int) maxlength;
+
+  /* Choose processor routine to use.
+   * APP0/APP14 have special requirements.
+   */
+  if (length_limit) {
+    processor = save_marker;
+    /* If saving APP0/APP14, save at least enough for our internal use. */
+    if (marker_code == (int) M_APP0 && length_limit < APP0_DATA_LEN)
+      length_limit = APP0_DATA_LEN;
+    else if (marker_code == (int) M_APP14 && length_limit < APP14_DATA_LEN)
+      length_limit = APP14_DATA_LEN;
+  } else {
+    processor = skip_variable;
+    /* If discarding APP0/APP14, use our regular on-the-fly processor. */
+    if (marker_code == (int) M_APP0 || marker_code == (int) M_APP14)
+      processor = get_interesting_appn;
+  }
+
+  if (marker_code == (int) M_COM) {
+    marker->process_COM = processor;
+    marker->length_limit_COM = length_limit;
+  } else if (marker_code >= (int) M_APP0 && marker_code <= (int) M_APP15) {
+    marker->process_APPn[marker_code - (int) M_APP0] = processor;
+    marker->length_limit_APPn[marker_code - (int) M_APP0] = length_limit;
+  } else
+    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, marker_code);
+}
+
+#endif /* SAVE_MARKERS_SUPPORTED */
+
+
+/*
+ * Install a special processing method for COM or APPn markers.
+ */
+
+GLOBAL(void)
+jpeg_set_marker_processor (j_decompress_ptr cinfo, int marker_code,
+			   jpeg_marker_parser_method routine)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+
+  if (marker_code == (int) M_COM)
+    marker->process_COM = routine;
+  else if (marker_code >= (int) M_APP0 && marker_code <= (int) M_APP15)
+    marker->process_APPn[marker_code - (int) M_APP0] = routine;
+  else
+    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, marker_code);
+}
diff --git a/libraries/jpeg/jdmaster.c b/libraries/jpeg/jdmaster.c
new file mode 100644
index 000000000..62c07671f
--- /dev/null
+++ b/libraries/jpeg/jdmaster.c
@@ -0,0 +1,538 @@
+/*
+ * jdmaster.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2002-2017 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains master control logic for the JPEG decompressor.
+ * These routines are concerned with selecting the modules to be executed
+ * and with determining the number of passes and the work to be done in each
+ * pass.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private state */
+
+typedef struct {
+  struct jpeg_decomp_master pub; /* public fields */
+
+  int pass_number;		/* # of passes completed */
+
+  boolean using_merged_upsample; /* TRUE if using merged upsample/cconvert */
+
+  /* Saved references to initialized quantizer modules,
+   * in case we need to switch modes.
+   */
+  struct jpeg_color_quantizer * quantizer_1pass;
+  struct jpeg_color_quantizer * quantizer_2pass;
+} my_decomp_master;
+
+typedef my_decomp_master * my_master_ptr;
+
+
+/*
+ * Determine whether merged upsample/color conversion should be used.
+ * CRUCIAL: this must match the actual capabilities of jdmerge.c!
+ */
+
+LOCAL(boolean)
+use_merged_upsample (j_decompress_ptr cinfo)
+{
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+  /* Merging is the equivalent of plain box-filter upsampling. */
+  /* The following condition is only needed if fancy shall select
+   * a different upsampling method.  In our current implementation
+   * fancy only affects the DCT scaling, thus we can use fancy
+   * upsampling and merged upsample simultaneously, in particular
+   * with scaled DCT sizes larger than the default DCTSIZE.
+   */
+#if 0
+  if (cinfo->do_fancy_upsampling)
+    return FALSE;
+#endif
+  if (cinfo->CCIR601_sampling)
+    return FALSE;
+  /* jdmerge.c only supports YCC=>RGB color conversion */
+  if ((cinfo->jpeg_color_space != JCS_YCbCr &&
+       cinfo->jpeg_color_space != JCS_BG_YCC) ||
+      cinfo->num_components != 3 ||
+      cinfo->out_color_space != JCS_RGB ||
+      cinfo->out_color_components != RGB_PIXELSIZE ||
+      cinfo->color_transform)
+    return FALSE;
+  /* and it only handles 2h1v or 2h2v sampling ratios */
+  if (cinfo->comp_info[0].h_samp_factor != 2 ||
+      cinfo->comp_info[1].h_samp_factor != 1 ||
+      cinfo->comp_info[2].h_samp_factor != 1 ||
+      cinfo->comp_info[0].v_samp_factor >  2 ||
+      cinfo->comp_info[1].v_samp_factor != 1 ||
+      cinfo->comp_info[2].v_samp_factor != 1)
+    return FALSE;
+  /* furthermore, it doesn't work if we've scaled the IDCTs differently */
+  if (cinfo->comp_info[0].DCT_h_scaled_size != cinfo->min_DCT_h_scaled_size ||
+      cinfo->comp_info[1].DCT_h_scaled_size != cinfo->min_DCT_h_scaled_size ||
+      cinfo->comp_info[2].DCT_h_scaled_size != cinfo->min_DCT_h_scaled_size ||
+      cinfo->comp_info[0].DCT_v_scaled_size != cinfo->min_DCT_v_scaled_size ||
+      cinfo->comp_info[1].DCT_v_scaled_size != cinfo->min_DCT_v_scaled_size ||
+      cinfo->comp_info[2].DCT_v_scaled_size != cinfo->min_DCT_v_scaled_size)
+    return FALSE;
+  /* ??? also need to test for upsample-time rescaling, when & if supported */
+  return TRUE;			/* by golly, it'll work... */
+#else
+  return FALSE;
+#endif
+}
+
+
+/*
+ * Compute output image dimensions and related values.
+ * NOTE: this is exported for possible use by application.
+ * Hence it mustn't do anything that can't be done twice.
+ * Also note that it may be called before the master module is initialized!
+ */
+
+GLOBAL(void)
+jpeg_calc_output_dimensions (j_decompress_ptr cinfo)
+/* Do computations that are needed before master selection phase.
+ * This function is used for full decompression.
+ */
+{
+#ifdef IDCT_SCALING_SUPPORTED
+  int ci;
+  jpeg_component_info *compptr;
+#endif
+
+  /* Prevent application from calling me at wrong times */
+  if (cinfo->global_state != DSTATE_READY)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* Compute core output image dimensions and DCT scaling choices. */
+  jpeg_core_output_dimensions(cinfo);
+
+#ifdef IDCT_SCALING_SUPPORTED
+
+  /* In selecting the actual DCT scaling for each component, we try to
+   * scale up the chroma components via IDCT scaling rather than upsampling.
+   * This saves time if the upsampler gets to use 1:1 scaling.
+   * Note this code adapts subsampling ratios which are powers of 2.
+   */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    int ssize = 1;
+    while (cinfo->min_DCT_h_scaled_size * ssize <=
+	   (cinfo->do_fancy_upsampling ? DCTSIZE : DCTSIZE / 2) &&
+	   (cinfo->max_h_samp_factor % (compptr->h_samp_factor * ssize * 2)) == 0) {
+      ssize = ssize * 2;
+    }
+    compptr->DCT_h_scaled_size = cinfo->min_DCT_h_scaled_size * ssize;
+    ssize = 1;
+    while (cinfo->min_DCT_v_scaled_size * ssize <=
+	   (cinfo->do_fancy_upsampling ? DCTSIZE : DCTSIZE / 2) &&
+	   (cinfo->max_v_samp_factor % (compptr->v_samp_factor * ssize * 2)) == 0) {
+      ssize = ssize * 2;
+    }
+    compptr->DCT_v_scaled_size = cinfo->min_DCT_v_scaled_size * ssize;
+
+    /* We don't support IDCT ratios larger than 2. */
+    if (compptr->DCT_h_scaled_size > compptr->DCT_v_scaled_size * 2)
+	compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size * 2;
+    else if (compptr->DCT_v_scaled_size > compptr->DCT_h_scaled_size * 2)
+	compptr->DCT_v_scaled_size = compptr->DCT_h_scaled_size * 2;
+  }
+
+  /* Recompute downsampled dimensions of components;
+   * application needs to know these if using raw downsampled data.
+   */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Size in samples, after IDCT scaling */
+    compptr->downsampled_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width *
+		    (long) (compptr->h_samp_factor * compptr->DCT_h_scaled_size),
+		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
+    compptr->downsampled_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height *
+		    (long) (compptr->v_samp_factor * compptr->DCT_v_scaled_size),
+		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
+  }
+
+#endif /* IDCT_SCALING_SUPPORTED */
+
+  /* Report number of components in selected colorspace. */
+  /* Probably this should be in the color conversion module... */
+  switch (cinfo->out_color_space) {
+  case JCS_GRAYSCALE:
+    cinfo->out_color_components = 1;
+    break;
+  case JCS_RGB:
+  case JCS_BG_RGB:
+    cinfo->out_color_components = RGB_PIXELSIZE;
+    break;
+  case JCS_YCbCr:
+  case JCS_BG_YCC:
+    cinfo->out_color_components = 3;
+    break;
+  case JCS_CMYK:
+  case JCS_YCCK:
+    cinfo->out_color_components = 4;
+    break;
+  default:			/* else must be same colorspace as in file */
+    cinfo->out_color_components = cinfo->num_components;
+    break;
+  }
+  cinfo->output_components = (cinfo->quantize_colors ? 1 :
+			      cinfo->out_color_components);
+
+  /* See if upsampler will want to emit more than one row at a time */
+  if (use_merged_upsample(cinfo))
+    cinfo->rec_outbuf_height = cinfo->max_v_samp_factor;
+  else
+    cinfo->rec_outbuf_height = 1;
+}
+
+
+/*
+ * Several decompression processes need to range-limit values to the range
+ * 0..MAXJSAMPLE; the input value may fall somewhat outside this range
+ * due to noise introduced by quantization, roundoff error, etc.  These
+ * processes are inner loops and need to be as fast as possible.  On most
+ * machines, particularly CPUs with pipelines or instruction prefetch,
+ * a (subscript-check-less) C table lookup
+ *		x = sample_range_limit[x];
+ * is faster than explicit tests
+ *		if (x < 0)  x = 0;
+ *		else if (x > MAXJSAMPLE)  x = MAXJSAMPLE;
+ * These processes all use a common table prepared by the routine below.
+ *
+ * For most steps we can mathematically guarantee that the initial value
+ * of x is within 2*(MAXJSAMPLE+1) of the legal range, so a table running
+ * from -2*(MAXJSAMPLE+1) to 3*MAXJSAMPLE+2 is sufficient.  But for the
+ * initial limiting step (just after the IDCT), a wildly out-of-range value
+ * is possible if the input data is corrupt.  To avoid any chance of indexing
+ * off the end of memory and getting a bad-pointer trap, we perform the
+ * post-IDCT limiting thus:
+ *		x = (sample_range_limit - SUBSET)[(x + CENTER) & MASK];
+ * where MASK is 2 bits wider than legal sample data, ie 10 bits for 8-bit
+ * samples.  Under normal circumstances this is more than enough range and
+ * a correct output will be generated; with bogus input data the mask will
+ * cause wraparound, and we will safely generate a bogus-but-in-range output.
+ * For the post-IDCT step, we want to convert the data from signed to unsigned
+ * representation by adding CENTERJSAMPLE at the same time that we limit it.
+ * This is accomplished with SUBSET = CENTER - CENTERJSAMPLE.
+ *
+ * Note that the table is allocated in near data space on PCs; it's small
+ * enough and used often enough to justify this.
+ */
+
+LOCAL(void)
+prepare_range_limit_table (j_decompress_ptr cinfo)
+/* Allocate and fill in the sample_range_limit table */
+{
+  JSAMPLE * table;
+  int i;
+
+  table = (JSAMPLE *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo,
+    JPOOL_IMAGE, (RANGE_CENTER * 2 + MAXJSAMPLE + 1) * SIZEOF(JSAMPLE));
+  /* First segment of range limit table: limit[x] = 0 for x < 0 */
+  MEMZERO(table, RANGE_CENTER * SIZEOF(JSAMPLE));
+  table += RANGE_CENTER;	/* allow negative subscripts of table */
+  cinfo->sample_range_limit = table;
+  /* Main part of range limit table: limit[x] = x */
+  for (i = 0; i <= MAXJSAMPLE; i++)
+    table[i] = (JSAMPLE) i;
+  /* End of range limit table: limit[x] = MAXJSAMPLE for x > MAXJSAMPLE */
+  for (; i <=  MAXJSAMPLE + RANGE_CENTER; i++)
+    table[i] = MAXJSAMPLE;
+}
+
+
+/*
+ * Master selection of decompression modules.
+ * This is done once at jpeg_start_decompress time.  We determine
+ * which modules will be used and give them appropriate initialization calls.
+ * We also initialize the decompressor input side to begin consuming data.
+ *
+ * Since jpeg_read_header has finished, we know what is in the SOF
+ * and (first) SOS markers.  We also have all the application parameter
+ * settings.
+ */
+
+LOCAL(void)
+master_selection (j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+  boolean use_c_buffer;
+  long samplesperrow;
+  JDIMENSION jd_samplesperrow;
+
+  /* For now, precision must match compiled-in value... */
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  /* Initialize dimensions and other stuff */
+  jpeg_calc_output_dimensions(cinfo);
+  prepare_range_limit_table(cinfo);
+
+  /* Sanity check on image dimensions */
+  if (cinfo->output_height <= 0 || cinfo->output_width <= 0 ||
+      cinfo->out_color_components <= 0)
+    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
+
+  /* Width of an output scanline must be representable as JDIMENSION. */
+  samplesperrow = (long) cinfo->output_width * (long) cinfo->out_color_components;
+  jd_samplesperrow = (JDIMENSION) samplesperrow;
+  if ((long) jd_samplesperrow != samplesperrow)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+
+  /* Initialize my private state */
+  master->pass_number = 0;
+  master->using_merged_upsample = use_merged_upsample(cinfo);
+
+  /* Color quantizer selection */
+  master->quantizer_1pass = NULL;
+  master->quantizer_2pass = NULL;
+  /* No mode changes if not using buffered-image mode. */
+  if (! cinfo->quantize_colors || ! cinfo->buffered_image) {
+    cinfo->enable_1pass_quant = FALSE;
+    cinfo->enable_external_quant = FALSE;
+    cinfo->enable_2pass_quant = FALSE;
+  }
+  if (cinfo->quantize_colors) {
+    if (cinfo->raw_data_out)
+      ERREXIT(cinfo, JERR_NOTIMPL);
+    /* 2-pass quantizer only works in 3-component color space. */
+    if (cinfo->out_color_components != 3) {
+      cinfo->enable_1pass_quant = TRUE;
+      cinfo->enable_external_quant = FALSE;
+      cinfo->enable_2pass_quant = FALSE;
+      cinfo->colormap = NULL;
+    } else if (cinfo->colormap != NULL) {
+      cinfo->enable_external_quant = TRUE;
+    } else if (cinfo->two_pass_quantize) {
+      cinfo->enable_2pass_quant = TRUE;
+    } else {
+      cinfo->enable_1pass_quant = TRUE;
+    }
+
+    if (cinfo->enable_1pass_quant) {
+#ifdef QUANT_1PASS_SUPPORTED
+      jinit_1pass_quantizer(cinfo);
+      master->quantizer_1pass = cinfo->cquantize;
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    }
+
+    /* We use the 2-pass code to map to external colormaps. */
+    if (cinfo->enable_2pass_quant || cinfo->enable_external_quant) {
+#ifdef QUANT_2PASS_SUPPORTED
+      jinit_2pass_quantizer(cinfo);
+      master->quantizer_2pass = cinfo->cquantize;
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    }
+    /* If both quantizers are initialized, the 2-pass one is left active;
+     * this is necessary for starting with quantization to an external map.
+     */
+  }
+
+  /* Post-processing: in particular, color conversion first */
+  if (! cinfo->raw_data_out) {
+    if (master->using_merged_upsample) {
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+      jinit_merged_upsampler(cinfo); /* does color conversion too */
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    } else {
+      jinit_color_deconverter(cinfo);
+      jinit_upsampler(cinfo);
+    }
+    jinit_d_post_controller(cinfo, cinfo->enable_2pass_quant);
+  }
+  /* Inverse DCT */
+  jinit_inverse_dct(cinfo);
+  /* Entropy decoding: either Huffman or arithmetic coding. */
+  if (cinfo->arith_code)
+    jinit_arith_decoder(cinfo);
+  else {
+    jinit_huff_decoder(cinfo);
+  }
+
+  /* Initialize principal buffer controllers. */
+  use_c_buffer = cinfo->inputctl->has_multiple_scans || cinfo->buffered_image;
+  jinit_d_coef_controller(cinfo, use_c_buffer);
+
+  if (! cinfo->raw_data_out)
+    jinit_d_main_controller(cinfo, FALSE /* never need full buffer here */);
+
+  /* We can now tell the memory manager to allocate virtual arrays. */
+  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
+
+  /* Initialize input side of decompressor to consume first scan. */
+  (*cinfo->inputctl->start_input_pass) (cinfo);
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+  /* If jpeg_start_decompress will read the whole file, initialize
+   * progress monitoring appropriately.  The input step is counted
+   * as one pass.
+   */
+  if (cinfo->progress != NULL && ! cinfo->buffered_image &&
+      cinfo->inputctl->has_multiple_scans) {
+    int nscans;
+    /* Estimate number of scans to set pass_limit. */
+    if (cinfo->progressive_mode) {
+      /* Arbitrarily estimate 2 interleaved DC scans + 3 AC scans/component. */
+      nscans = 2 + 3 * cinfo->num_components;
+    } else {
+      /* For a nonprogressive multiscan file, estimate 1 scan per component. */
+      nscans = cinfo->num_components;
+    }
+    cinfo->progress->pass_counter = 0L;
+    cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows * nscans;
+    cinfo->progress->completed_passes = 0;
+    cinfo->progress->total_passes = (cinfo->enable_2pass_quant ? 3 : 2);
+    /* Count the input pass as done */
+    master->pass_number++;
+  }
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+}
+
+
+/*
+ * Per-pass setup.
+ * This is called at the beginning of each output pass.  We determine which
+ * modules will be active during this pass and give them appropriate
+ * start_pass calls.  We also set is_dummy_pass to indicate whether this
+ * is a "real" output pass or a dummy pass for color quantization.
+ * (In the latter case, jdapistd.c will crank the pass to completion.)
+ */
+
+METHODDEF(void)
+prepare_for_output_pass (j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  if (master->pub.is_dummy_pass) {
+#ifdef QUANT_2PASS_SUPPORTED
+    /* Final pass of 2-pass quantization */
+    master->pub.is_dummy_pass = FALSE;
+    (*cinfo->cquantize->start_pass) (cinfo, FALSE);
+    (*cinfo->post->start_pass) (cinfo, JBUF_CRANK_DEST);
+    (*cinfo->main->start_pass) (cinfo, JBUF_CRANK_DEST);
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif /* QUANT_2PASS_SUPPORTED */
+  } else {
+    if (cinfo->quantize_colors && cinfo->colormap == NULL) {
+      /* Select new quantization method */
+      if (cinfo->two_pass_quantize && cinfo->enable_2pass_quant) {
+	cinfo->cquantize = master->quantizer_2pass;
+	master->pub.is_dummy_pass = TRUE;
+      } else if (cinfo->enable_1pass_quant) {
+	cinfo->cquantize = master->quantizer_1pass;
+      } else {
+	ERREXIT(cinfo, JERR_MODE_CHANGE);
+      }
+    }
+    (*cinfo->idct->start_pass) (cinfo);
+    (*cinfo->coef->start_output_pass) (cinfo);
+    if (! cinfo->raw_data_out) {
+      if (! master->using_merged_upsample)
+	(*cinfo->cconvert->start_pass) (cinfo);
+      (*cinfo->upsample->start_pass) (cinfo);
+      if (cinfo->quantize_colors)
+	(*cinfo->cquantize->start_pass) (cinfo, master->pub.is_dummy_pass);
+      (*cinfo->post->start_pass) (cinfo,
+	    (master->pub.is_dummy_pass ? JBUF_SAVE_AND_PASS : JBUF_PASS_THRU));
+      (*cinfo->main->start_pass) (cinfo, JBUF_PASS_THRU);
+    }
+  }
+
+  /* Set up progress monitor's pass info if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->completed_passes = master->pass_number;
+    cinfo->progress->total_passes = master->pass_number +
+				    (master->pub.is_dummy_pass ? 2 : 1);
+    /* In buffered-image mode, we assume one more output pass if EOI not
+     * yet reached, but no more passes if EOI has been reached.
+     */
+    if (cinfo->buffered_image && ! cinfo->inputctl->eoi_reached) {
+      cinfo->progress->total_passes += (cinfo->enable_2pass_quant ? 2 : 1);
+    }
+  }
+}
+
+
+/*
+ * Finish up at end of an output pass.
+ */
+
+METHODDEF(void)
+finish_output_pass (j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  if (cinfo->quantize_colors)
+    (*cinfo->cquantize->finish_pass) (cinfo);
+  master->pass_number++;
+}
+
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+
+/*
+ * Switch to a new external colormap between output passes.
+ */
+
+GLOBAL(void)
+jpeg_new_colormap (j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  /* Prevent application from calling me at wrong times */
+  if (cinfo->global_state != DSTATE_BUFIMAGE)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  if (cinfo->quantize_colors && cinfo->enable_external_quant &&
+      cinfo->colormap != NULL) {
+    /* Select 2-pass quantizer for external colormap use */
+    cinfo->cquantize = master->quantizer_2pass;
+    /* Notify quantizer of colormap change */
+    (*cinfo->cquantize->new_color_map) (cinfo);
+    master->pub.is_dummy_pass = FALSE; /* just in case */
+  } else
+    ERREXIT(cinfo, JERR_MODE_CHANGE);
+}
+
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+
+
+/*
+ * Initialize master decompression control and select active modules.
+ * This is performed at the start of jpeg_start_decompress.
+ */
+
+GLOBAL(void)
+jinit_master_decompress (j_decompress_ptr cinfo)
+{
+  my_master_ptr master;
+
+  master = (my_master_ptr)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(my_decomp_master));
+  cinfo->master = &master->pub;
+  master->pub.prepare_for_output_pass = prepare_for_output_pass;
+  master->pub.finish_output_pass = finish_output_pass;
+
+  master->pub.is_dummy_pass = FALSE;
+
+  master_selection(cinfo);
+}
diff --git a/libraries/jpeg/jdmerge.c b/libraries/jpeg/jdmerge.c
new file mode 100644
index 000000000..866693f52
--- /dev/null
+++ b/libraries/jpeg/jdmerge.c
@@ -0,0 +1,451 @@
+/*
+ * jdmerge.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2013-2017 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains code for merged upsampling/color conversion.
+ *
+ * This file combines functions from jdsample.c and jdcolor.c;
+ * read those files first to understand what's going on.
+ *
+ * When the chroma components are to be upsampled by simple replication
+ * (ie, box filtering), we can save some work in color conversion by
+ * calculating all the output pixels corresponding to a pair of chroma
+ * samples at one time.  In the conversion equations
+ *	R = Y           + K1 * Cr
+ *	G = Y + K2 * Cb + K3 * Cr
+ *	B = Y + K4 * Cb
+ * only the Y term varies among the group of pixels corresponding to a pair
+ * of chroma samples, so the rest of the terms can be calculated just once.
+ * At typical sampling ratios, this eliminates half or three-quarters of the
+ * multiplications needed for color conversion.
+ *
+ * This file currently provides implementations for the following cases:
+ *	YCC => RGB color conversion only (YCbCr or BG_YCC).
+ *	Sampling ratios of 2h1v or 2h2v.
+ *	No scaling needed at upsample time.
+ *	Corner-aligned (non-CCIR601) sampling alignment.
+ * Other special cases could be added, but in most applications these are
+ * the only common cases.  (For uncommon cases we fall back on the more
+ * general code in jdsample.c and jdcolor.c.)
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+
+
+#if RANGE_BITS < 2
+  /* Deliberate syntax err */
+  Sorry, this code requires 2 or more range extension bits.
+#endif
+
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_upsampler pub;	/* public fields */
+
+  /* Pointer to routine to do actual upsampling/conversion of one row group */
+  JMETHOD(void, upmethod, (j_decompress_ptr cinfo,
+			   JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+			   JSAMPARRAY output_buf));
+
+  /* Private state for YCC->RGB conversion */
+  int * Cr_r_tab;		/* => table for Cr to R conversion */
+  int * Cb_b_tab;		/* => table for Cb to B conversion */
+  INT32 * Cr_g_tab;		/* => table for Cr to G conversion */
+  INT32 * Cb_g_tab;		/* => table for Cb to G conversion */
+
+  /* For 2:1 vertical sampling, we produce two output rows at a time.
+   * We need a "spare" row buffer to hold the second output row if the
+   * application provides just a one-row buffer; we also use the spare
+   * to discard the dummy last row if the image height is odd.
+   */
+  JSAMPROW spare_row;
+  boolean spare_full;		/* T if spare buffer is occupied */
+
+  JDIMENSION out_row_width;	/* samples per output row */
+  JDIMENSION rows_to_go;	/* counts rows remaining in image */
+} my_upsampler;
+
+typedef my_upsampler * my_upsample_ptr;
+
+#define SCALEBITS	16	/* speediest right-shift on some machines */
+#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
+#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+
+
+/*
+ * Initialize tables for YCbCr->RGB and BG_YCC->RGB colorspace conversion.
+ * This is taken directly from jdcolor.c; see that file for more info.
+ */
+
+LOCAL(void)
+build_ycc_rgb_table (j_decompress_ptr cinfo)
+/* Normal case, sYCC */
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  int i;
+  INT32 x;
+  SHIFT_TEMPS
+
+  upsample->Cr_r_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(int));
+  upsample->Cb_b_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(int));
+  upsample->Cr_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(INT32));
+  upsample->Cb_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(INT32));
+
+  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
+    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
+    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
+    /* Cr=>R value is nearest int to 1.402 * x */
+    upsample->Cr_r_tab[i] = (int)
+		    RIGHT_SHIFT(FIX(1.402) * x + ONE_HALF, SCALEBITS);
+    /* Cb=>B value is nearest int to 1.772 * x */
+    upsample->Cb_b_tab[i] = (int)
+		    RIGHT_SHIFT(FIX(1.772) * x + ONE_HALF, SCALEBITS);
+    /* Cr=>G value is scaled-up -0.714136286 * x */
+    upsample->Cr_g_tab[i] = (- FIX(0.714136286)) * x;
+    /* Cb=>G value is scaled-up -0.344136286 * x */
+    /* We also add in ONE_HALF so that need not do it in inner loop */
+    upsample->Cb_g_tab[i] = (- FIX(0.344136286)) * x + ONE_HALF;
+  }
+}
+
+
+LOCAL(void)
+build_bg_ycc_rgb_table (j_decompress_ptr cinfo)
+/* Wide gamut case, bg-sYCC */
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  int i;
+  INT32 x;
+  SHIFT_TEMPS
+
+  upsample->Cr_r_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(int));
+  upsample->Cb_b_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(int));
+  upsample->Cr_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(INT32));
+  upsample->Cb_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(INT32));
+
+  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
+    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
+    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
+    /* Cr=>R value is nearest int to 2.804 * x */
+    upsample->Cr_r_tab[i] = (int)
+		    RIGHT_SHIFT(FIX(2.804) * x + ONE_HALF, SCALEBITS);
+    /* Cb=>B value is nearest int to 3.544 * x */
+    upsample->Cb_b_tab[i] = (int)
+		    RIGHT_SHIFT(FIX(3.544) * x + ONE_HALF, SCALEBITS);
+    /* Cr=>G value is scaled-up -1.428272572 * x */
+    upsample->Cr_g_tab[i] = (- FIX(1.428272572)) * x;
+    /* Cb=>G value is scaled-up -0.688272572 * x */
+    /* We also add in ONE_HALF so that need not do it in inner loop */
+    upsample->Cb_g_tab[i] = (- FIX(0.688272572)) * x + ONE_HALF;
+  }
+}
+
+
+/*
+ * Initialize for an upsampling pass.
+ */
+
+METHODDEF(void)
+start_pass_merged_upsample (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+
+  /* Mark the spare buffer empty */
+  upsample->spare_full = FALSE;
+  /* Initialize total-height counter for detecting bottom of image */
+  upsample->rows_to_go = cinfo->output_height;
+}
+
+
+/*
+ * Control routine to do upsampling (and color conversion).
+ *
+ * The control routine just handles the row buffering considerations.
+ */
+
+METHODDEF(void)
+merged_2v_upsample (j_decompress_ptr cinfo,
+		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		    JDIMENSION in_row_groups_avail,
+		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		    JDIMENSION out_rows_avail)
+/* 2:1 vertical sampling case: may need a spare row. */
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  JSAMPROW work_ptrs[2];
+  JDIMENSION num_rows;		/* number of rows returned to caller */
+
+  if (upsample->spare_full) {
+    /* If we have a spare row saved from a previous cycle, just return it. */
+    jcopy_sample_rows(& upsample->spare_row, 0, output_buf + *out_row_ctr, 0,
+		      1, upsample->out_row_width);
+    num_rows = 1;
+    upsample->spare_full = FALSE;
+  } else {
+    /* Figure number of rows to return to caller. */
+    num_rows = 2;
+    /* Not more than the distance to the end of the image. */
+    if (num_rows > upsample->rows_to_go)
+      num_rows = upsample->rows_to_go;
+    /* And not more than what the client can accept: */
+    out_rows_avail -= *out_row_ctr;
+    if (num_rows > out_rows_avail)
+      num_rows = out_rows_avail;
+    /* Create output pointer array for upsampler. */
+    work_ptrs[0] = output_buf[*out_row_ctr];
+    if (num_rows > 1) {
+      work_ptrs[1] = output_buf[*out_row_ctr + 1];
+    } else {
+      work_ptrs[1] = upsample->spare_row;
+      upsample->spare_full = TRUE;
+    }
+    /* Now do the upsampling. */
+    (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr, work_ptrs);
+  }
+
+  /* Adjust counts */
+  *out_row_ctr += num_rows;
+  upsample->rows_to_go -= num_rows;
+  /* When the buffer is emptied, declare this input row group consumed */
+  if (! upsample->spare_full)
+    (*in_row_group_ctr)++;
+}
+
+
+METHODDEF(void)
+merged_1v_upsample (j_decompress_ptr cinfo,
+		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		    JDIMENSION in_row_groups_avail,
+		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		    JDIMENSION out_rows_avail)
+/* 1:1 vertical sampling case: much easier, never need a spare row. */
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+
+  /* Just do the upsampling. */
+  (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr,
+			 output_buf + *out_row_ctr);
+  /* Adjust counts */
+  (*out_row_ctr)++;
+  (*in_row_group_ctr)++;
+}
+
+
+/*
+ * These are the routines invoked by the control routines to do
+ * the actual upsampling/conversion.  One row group is processed per call.
+ *
+ * Note: since we may be writing directly into application-supplied buffers,
+ * we have to be honest about the output width; we can't assume the buffer
+ * has been rounded up to an even width.
+ */
+
+
+/*
+ * Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
+ */
+
+METHODDEF(void)
+h2v1_merged_upsample (j_decompress_ptr cinfo,
+		      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+		      JSAMPARRAY output_buf)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register JSAMPROW outptr;
+  JSAMPROW inptr0, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  int * Crrtab = upsample->Cr_r_tab;
+  int * Cbbtab = upsample->Cb_b_tab;
+  INT32 * Crgtab = upsample->Cr_g_tab;
+  INT32 * Cbgtab = upsample->Cb_g_tab;
+  SHIFT_TEMPS
+
+  inptr0 = input_buf[0][in_row_group_ctr];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr = output_buf[0];
+  /* Loop for each pair of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    /* Do the chroma part of the calculation */
+    cb = GETJSAMPLE(*inptr1++);
+    cr = GETJSAMPLE(*inptr2++);
+    cred   = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue  = Cbbtab[cb];
+    /* Fetch 2 Y values and emit 2 pixels */
+    y  = GETJSAMPLE(*inptr0++);
+    outptr[RGB_RED]   = range_limit[y + cred];
+    outptr[RGB_GREEN] = range_limit[y + cgreen];
+    outptr[RGB_BLUE]  = range_limit[y + cblue];
+    outptr += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr0++);
+    outptr[RGB_RED]   = range_limit[y + cred];
+    outptr[RGB_GREEN] = range_limit[y + cgreen];
+    outptr[RGB_BLUE]  = range_limit[y + cblue];
+    outptr += RGB_PIXELSIZE;
+  }
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    cb = GETJSAMPLE(*inptr1);
+    cr = GETJSAMPLE(*inptr2);
+    cred   = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue  = Cbbtab[cb];
+    y  = GETJSAMPLE(*inptr0);
+    outptr[RGB_RED]   = range_limit[y + cred];
+    outptr[RGB_GREEN] = range_limit[y + cgreen];
+    outptr[RGB_BLUE]  = range_limit[y + cblue];
+  }
+}
+
+
+/*
+ * Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
+ */
+
+METHODDEF(void)
+h2v2_merged_upsample (j_decompress_ptr cinfo,
+		      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+		      JSAMPARRAY output_buf)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register JSAMPROW outptr0, outptr1;
+  JSAMPROW inptr00, inptr01, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  int * Crrtab = upsample->Cr_r_tab;
+  int * Cbbtab = upsample->Cb_b_tab;
+  INT32 * Crgtab = upsample->Cr_g_tab;
+  INT32 * Cbgtab = upsample->Cb_g_tab;
+  SHIFT_TEMPS
+
+  inptr00 = input_buf[0][in_row_group_ctr*2];
+  inptr01 = input_buf[0][in_row_group_ctr*2 + 1];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr0 = output_buf[0];
+  outptr1 = output_buf[1];
+  /* Loop for each group of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    /* Do the chroma part of the calculation */
+    cb = GETJSAMPLE(*inptr1++);
+    cr = GETJSAMPLE(*inptr2++);
+    cred   = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue  = Cbbtab[cb];
+    /* Fetch 4 Y values and emit 4 pixels */
+    y  = GETJSAMPLE(*inptr00++);
+    outptr0[RGB_RED]   = range_limit[y + cred];
+    outptr0[RGB_GREEN] = range_limit[y + cgreen];
+    outptr0[RGB_BLUE]  = range_limit[y + cblue];
+    outptr0 += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr00++);
+    outptr0[RGB_RED]   = range_limit[y + cred];
+    outptr0[RGB_GREEN] = range_limit[y + cgreen];
+    outptr0[RGB_BLUE]  = range_limit[y + cblue];
+    outptr0 += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr01++);
+    outptr1[RGB_RED]   = range_limit[y + cred];
+    outptr1[RGB_GREEN] = range_limit[y + cgreen];
+    outptr1[RGB_BLUE]  = range_limit[y + cblue];
+    outptr1 += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr01++);
+    outptr1[RGB_RED]   = range_limit[y + cred];
+    outptr1[RGB_GREEN] = range_limit[y + cgreen];
+    outptr1[RGB_BLUE]  = range_limit[y + cblue];
+    outptr1 += RGB_PIXELSIZE;
+  }
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    cb = GETJSAMPLE(*inptr1);
+    cr = GETJSAMPLE(*inptr2);
+    cred   = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue  = Cbbtab[cb];
+    y  = GETJSAMPLE(*inptr00);
+    outptr0[RGB_RED]   = range_limit[y + cred];
+    outptr0[RGB_GREEN] = range_limit[y + cgreen];
+    outptr0[RGB_BLUE]  = range_limit[y + cblue];
+    y  = GETJSAMPLE(*inptr01);
+    outptr1[RGB_RED]   = range_limit[y + cred];
+    outptr1[RGB_GREEN] = range_limit[y + cgreen];
+    outptr1[RGB_BLUE]  = range_limit[y + cblue];
+  }
+}
+
+
+/*
+ * Module initialization routine for merged upsampling/color conversion.
+ *
+ * NB: this is called under the conditions determined by use_merged_upsample()
+ * in jdmaster.c.  That routine MUST correspond to the actual capabilities
+ * of this module; no safety checks are made here.
+ */
+
+GLOBAL(void)
+jinit_merged_upsampler (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample;
+
+  upsample = (my_upsample_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_upsampler));
+  cinfo->upsample = &upsample->pub;
+  upsample->pub.start_pass = start_pass_merged_upsample;
+  upsample->pub.need_context_rows = FALSE;
+
+  upsample->out_row_width = cinfo->output_width * cinfo->out_color_components;
+
+  if (cinfo->max_v_samp_factor == 2) {
+    upsample->pub.upsample = merged_2v_upsample;
+    upsample->upmethod = h2v2_merged_upsample;
+    /* Allocate a spare row buffer */
+    upsample->spare_row = (JSAMPROW)
+      (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+		(size_t) (upsample->out_row_width * SIZEOF(JSAMPLE)));
+  } else {
+    upsample->pub.upsample = merged_1v_upsample;
+    upsample->upmethod = h2v1_merged_upsample;
+    /* No spare row needed */
+    upsample->spare_row = NULL;
+  }
+
+  if (cinfo->jpeg_color_space == JCS_BG_YCC)
+    build_bg_ycc_rgb_table(cinfo);
+  else
+    build_ycc_rgb_table(cinfo);
+}
+
+#endif /* UPSAMPLE_MERGING_SUPPORTED */
diff --git a/libraries/jpeg/jdpostct.c b/libraries/jpeg/jdpostct.c
new file mode 100644
index 000000000..571563d72
--- /dev/null
+++ b/libraries/jpeg/jdpostct.c
@@ -0,0 +1,290 @@
+/*
+ * jdpostct.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the decompression postprocessing controller.
+ * This controller manages the upsampling, color conversion, and color
+ * quantization/reduction steps; specifically, it controls the buffering
+ * between upsample/color conversion and color quantization/reduction.
+ *
+ * If no color quantization/reduction is required, then this module has no
+ * work to do, and it just hands off to the upsample/color conversion code.
+ * An integrated upsample/convert/quantize process would replace this module
+ * entirely.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_d_post_controller pub; /* public fields */
+
+  /* Color quantization source buffer: this holds output data from
+   * the upsample/color conversion step to be passed to the quantizer.
+   * For two-pass color quantization, we need a full-image buffer;
+   * for one-pass operation, a strip buffer is sufficient.
+   */
+  jvirt_sarray_ptr whole_image;	/* virtual array, or NULL if one-pass */
+  JSAMPARRAY buffer;		/* strip buffer, or current strip of virtual */
+  JDIMENSION strip_height;	/* buffer size in rows */
+  /* for two-pass mode only: */
+  JDIMENSION starting_row;	/* row # of first row in current strip */
+  JDIMENSION next_row;		/* index of next row to fill/empty in strip */
+} my_post_controller;
+
+typedef my_post_controller * my_post_ptr;
+
+
+/* Forward declarations */
+METHODDEF(void) post_process_1pass
+	JPP((j_decompress_ptr cinfo,
+	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+	     JDIMENSION in_row_groups_avail,
+	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+	     JDIMENSION out_rows_avail));
+#ifdef QUANT_2PASS_SUPPORTED
+METHODDEF(void) post_process_prepass
+	JPP((j_decompress_ptr cinfo,
+	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+	     JDIMENSION in_row_groups_avail,
+	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+	     JDIMENSION out_rows_avail));
+METHODDEF(void) post_process_2pass
+	JPP((j_decompress_ptr cinfo,
+	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+	     JDIMENSION in_row_groups_avail,
+	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+	     JDIMENSION out_rows_avail));
+#endif
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_dpost (j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_post_ptr post = (my_post_ptr) cinfo->post;
+
+  switch (pass_mode) {
+  case JBUF_PASS_THRU:
+    if (cinfo->quantize_colors) {
+      /* Single-pass processing with color quantization. */
+      post->pub.post_process_data = post_process_1pass;
+      /* We could be doing buffered-image output before starting a 2-pass
+       * color quantization; in that case, jinit_d_post_controller did not
+       * allocate a strip buffer.  Use the virtual-array buffer as workspace.
+       */
+      if (post->buffer == NULL) {
+	post->buffer = (*cinfo->mem->access_virt_sarray)
+	  ((j_common_ptr) cinfo, post->whole_image,
+	   (JDIMENSION) 0, post->strip_height, TRUE);
+      }
+    } else {
+      /* For single-pass processing without color quantization,
+       * I have no work to do; just call the upsampler directly.
+       */
+      post->pub.post_process_data = cinfo->upsample->upsample;
+    }
+    break;
+#ifdef QUANT_2PASS_SUPPORTED
+  case JBUF_SAVE_AND_PASS:
+    /* First pass of 2-pass quantization */
+    if (post->whole_image == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    post->pub.post_process_data = post_process_prepass;
+    break;
+  case JBUF_CRANK_DEST:
+    /* Second pass of 2-pass quantization */
+    if (post->whole_image == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    post->pub.post_process_data = post_process_2pass;
+    break;
+#endif /* QUANT_2PASS_SUPPORTED */
+  default:
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    break;
+  }
+  post->starting_row = post->next_row = 0;
+}
+
+
+/*
+ * Process some data in the one-pass (strip buffer) case.
+ * This is used for color precision reduction as well as one-pass quantization.
+ */
+
+METHODDEF(void)
+post_process_1pass (j_decompress_ptr cinfo,
+		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		    JDIMENSION in_row_groups_avail,
+		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		    JDIMENSION out_rows_avail)
+{
+  my_post_ptr post = (my_post_ptr) cinfo->post;
+  JDIMENSION num_rows, max_rows;
+
+  /* Fill the buffer, but not more than what we can dump out in one go. */
+  /* Note we rely on the upsampler to detect bottom of image. */
+  max_rows = out_rows_avail - *out_row_ctr;
+  if (max_rows > post->strip_height)
+    max_rows = post->strip_height;
+  num_rows = 0;
+  (*cinfo->upsample->upsample) (cinfo,
+		input_buf, in_row_group_ctr, in_row_groups_avail,
+		post->buffer, &num_rows, max_rows);
+  /* Quantize and emit data. */
+  (*cinfo->cquantize->color_quantize) (cinfo,
+		post->buffer, output_buf + *out_row_ctr, (int) num_rows);
+  *out_row_ctr += num_rows;
+}
+
+
+#ifdef QUANT_2PASS_SUPPORTED
+
+/*
+ * Process some data in the first pass of 2-pass quantization.
+ */
+
+METHODDEF(void)
+post_process_prepass (j_decompress_ptr cinfo,
+		      JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		      JDIMENSION in_row_groups_avail,
+		      JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		      JDIMENSION out_rows_avail)
+{
+  my_post_ptr post = (my_post_ptr) cinfo->post;
+  JDIMENSION old_next_row, num_rows;
+
+  /* Reposition virtual buffer if at start of strip. */
+  if (post->next_row == 0) {
+    post->buffer = (*cinfo->mem->access_virt_sarray)
+	((j_common_ptr) cinfo, post->whole_image,
+	 post->starting_row, post->strip_height, TRUE);
+  }
+
+  /* Upsample some data (up to a strip height's worth). */
+  old_next_row = post->next_row;
+  (*cinfo->upsample->upsample) (cinfo,
+		input_buf, in_row_group_ctr, in_row_groups_avail,
+		post->buffer, &post->next_row, post->strip_height);
+
+  /* Allow quantizer to scan new data.  No data is emitted, */
+  /* but we advance out_row_ctr so outer loop can tell when we're done. */
+  if (post->next_row > old_next_row) {
+    num_rows = post->next_row - old_next_row;
+    (*cinfo->cquantize->color_quantize) (cinfo, post->buffer + old_next_row,
+					 (JSAMPARRAY) NULL, (int) num_rows);
+    *out_row_ctr += num_rows;
+  }
+
+  /* Advance if we filled the strip. */
+  if (post->next_row >= post->strip_height) {
+    post->starting_row += post->strip_height;
+    post->next_row = 0;
+  }
+}
+
+
+/*
+ * Process some data in the second pass of 2-pass quantization.
+ */
+
+METHODDEF(void)
+post_process_2pass (j_decompress_ptr cinfo,
+		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		    JDIMENSION in_row_groups_avail,
+		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		    JDIMENSION out_rows_avail)
+{
+  my_post_ptr post = (my_post_ptr) cinfo->post;
+  JDIMENSION num_rows, max_rows;
+
+  /* Reposition virtual buffer if at start of strip. */
+  if (post->next_row == 0) {
+    post->buffer = (*cinfo->mem->access_virt_sarray)
+	((j_common_ptr) cinfo, post->whole_image,
+	 post->starting_row, post->strip_height, FALSE);
+  }
+
+  /* Determine number of rows to emit. */
+  num_rows = post->strip_height - post->next_row; /* available in strip */
+  max_rows = out_rows_avail - *out_row_ctr; /* available in output area */
+  if (num_rows > max_rows)
+    num_rows = max_rows;
+  /* We have to check bottom of image here, can't depend on upsampler. */
+  max_rows = cinfo->output_height - post->starting_row;
+  if (num_rows > max_rows)
+    num_rows = max_rows;
+
+  /* Quantize and emit data. */
+  (*cinfo->cquantize->color_quantize) (cinfo,
+		post->buffer + post->next_row, output_buf + *out_row_ctr,
+		(int) num_rows);
+  *out_row_ctr += num_rows;
+
+  /* Advance if we filled the strip. */
+  post->next_row += num_rows;
+  if (post->next_row >= post->strip_height) {
+    post->starting_row += post->strip_height;
+    post->next_row = 0;
+  }
+}
+
+#endif /* QUANT_2PASS_SUPPORTED */
+
+
+/*
+ * Initialize postprocessing controller.
+ */
+
+GLOBAL(void)
+jinit_d_post_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
+{
+  my_post_ptr post;
+
+  post = (my_post_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_post_controller));
+  cinfo->post = (struct jpeg_d_post_controller *) post;
+  post->pub.start_pass = start_pass_dpost;
+  post->whole_image = NULL;	/* flag for no virtual arrays */
+  post->buffer = NULL;		/* flag for no strip buffer */
+
+  /* Create the quantization buffer, if needed */
+  if (cinfo->quantize_colors) {
+    /* The buffer strip height is max_v_samp_factor, which is typically
+     * an efficient number of rows for upsampling to return.
+     * (In the presence of output rescaling, we might want to be smarter?)
+     */
+    post->strip_height = (JDIMENSION) cinfo->max_v_samp_factor;
+    if (need_full_buffer) {
+      /* Two-pass color quantization: need full-image storage. */
+      /* We round up the number of rows to a multiple of the strip height. */
+#ifdef QUANT_2PASS_SUPPORTED
+      post->whole_image = (*cinfo->mem->request_virt_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
+	 cinfo->output_width * cinfo->out_color_components,
+	 (JDIMENSION) jround_up((long) cinfo->output_height,
+				(long) post->strip_height),
+	 post->strip_height);
+#else
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+#endif /* QUANT_2PASS_SUPPORTED */
+    } else {
+      /* One-pass color quantization: just make a strip buffer. */
+      post->buffer = (*cinfo->mem->alloc_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE,
+	 cinfo->output_width * cinfo->out_color_components,
+	 post->strip_height);
+    }
+  }
+}
diff --git a/libraries/jpeg/jdsample.c b/libraries/jpeg/jdsample.c
new file mode 100644
index 000000000..fd9907e20
--- /dev/null
+++ b/libraries/jpeg/jdsample.c
@@ -0,0 +1,358 @@
+/*
+ * jdsample.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * Modified 2002-2015 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains upsampling routines.
+ *
+ * Upsampling input data is counted in "row groups".  A row group
+ * is defined to be (v_samp_factor * DCT_v_scaled_size / min_DCT_v_scaled_size)
+ * sample rows of each component.  Upsampling will normally produce
+ * max_v_samp_factor pixel rows from each row group (but this could vary
+ * if the upsampler is applying a scale factor of its own).
+ *
+ * An excellent reference for image resampling is
+ *   Digital Image Warping, George Wolberg, 1990.
+ *   Pub. by IEEE Computer Society Press, Los Alamitos, CA. ISBN 0-8186-8944-7.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Pointer to routine to upsample a single component */
+typedef JMETHOD(void, upsample1_ptr,
+		(j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_upsampler pub;	/* public fields */
+
+  /* Color conversion buffer.  When using separate upsampling and color
+   * conversion steps, this buffer holds one upsampled row group until it
+   * has been color converted and output.
+   * Note: we do not allocate any storage for component(s) which are full-size,
+   * ie do not need rescaling.  The corresponding entry of color_buf[] is
+   * simply set to point to the input data array, thereby avoiding copying.
+   */
+  JSAMPARRAY color_buf[MAX_COMPONENTS];
+
+  /* Per-component upsampling method pointers */
+  upsample1_ptr methods[MAX_COMPONENTS];
+
+  int next_row_out;		/* counts rows emitted from color_buf */
+  JDIMENSION rows_to_go;	/* counts rows remaining in image */
+
+  /* Height of an input row group for each component. */
+  int rowgroup_height[MAX_COMPONENTS];
+
+  /* These arrays save pixel expansion factors so that int_expand need not
+   * recompute them each time.  They are unused for other upsampling methods.
+   */
+  UINT8 h_expand[MAX_COMPONENTS];
+  UINT8 v_expand[MAX_COMPONENTS];
+} my_upsampler;
+
+typedef my_upsampler * my_upsample_ptr;
+
+
+/*
+ * Initialize for an upsampling pass.
+ */
+
+METHODDEF(void)
+start_pass_upsample (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+
+  /* Mark the conversion buffer empty */
+  upsample->next_row_out = cinfo->max_v_samp_factor;
+  /* Initialize total-height counter for detecting bottom of image */
+  upsample->rows_to_go = cinfo->output_height;
+}
+
+
+/*
+ * Control routine to do upsampling (and color conversion).
+ *
+ * In this version we upsample each component independently.
+ * We upsample one row group into the conversion buffer, then apply
+ * color conversion a row at a time.
+ */
+
+METHODDEF(void)
+sep_upsample (j_decompress_ptr cinfo,
+	      JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+	      JDIMENSION in_row_groups_avail,
+	      JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+	      JDIMENSION out_rows_avail)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  int ci;
+  jpeg_component_info * compptr;
+  JDIMENSION num_rows;
+
+  /* Fill the conversion buffer, if it's empty */
+  if (upsample->next_row_out >= cinfo->max_v_samp_factor) {
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      /* Invoke per-component upsample method.  Notice we pass a POINTER
+       * to color_buf[ci], so that fullsize_upsample can change it.
+       */
+      (*upsample->methods[ci]) (cinfo, compptr,
+	input_buf[ci] + (*in_row_group_ctr * upsample->rowgroup_height[ci]),
+	upsample->color_buf + ci);
+    }
+    upsample->next_row_out = 0;
+  }
+
+  /* Color-convert and emit rows */
+
+  /* How many we have in the buffer: */
+  num_rows = (JDIMENSION) (cinfo->max_v_samp_factor - upsample->next_row_out);
+  /* Not more than the distance to the end of the image.  Need this test
+   * in case the image height is not a multiple of max_v_samp_factor:
+   */
+  if (num_rows > upsample->rows_to_go) 
+    num_rows = upsample->rows_to_go;
+  /* And not more than what the client can accept: */
+  out_rows_avail -= *out_row_ctr;
+  if (num_rows > out_rows_avail)
+    num_rows = out_rows_avail;
+
+  (*cinfo->cconvert->color_convert) (cinfo, upsample->color_buf,
+				     (JDIMENSION) upsample->next_row_out,
+				     output_buf + *out_row_ctr,
+				     (int) num_rows);
+
+  /* Adjust counts */
+  *out_row_ctr += num_rows;
+  upsample->rows_to_go -= num_rows;
+  upsample->next_row_out += num_rows;
+  /* When the buffer is emptied, declare this input row group consumed */
+  if (upsample->next_row_out >= cinfo->max_v_samp_factor)
+    (*in_row_group_ctr)++;
+}
+
+
+/*
+ * These are the routines invoked by sep_upsample to upsample pixel values
+ * of a single component.  One row group is processed per call.
+ */
+
+
+/*
+ * For full-size components, we just make color_buf[ci] point at the
+ * input buffer, and thus avoid copying any data.  Note that this is
+ * safe only because sep_upsample doesn't declare the input row group
+ * "consumed" until we are done color converting and emitting it.
+ */
+
+METHODDEF(void)
+fullsize_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		   JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  *output_data_ptr = input_data;
+}
+
+
+/*
+ * This is a no-op version used for "uninteresting" components.
+ * These components will not be referenced by color conversion.
+ */
+
+METHODDEF(void)
+noop_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  *output_data_ptr = NULL;	/* safety check */
+}
+
+
+/*
+ * This version handles any integral sampling ratios.
+ * This is not used for typical JPEG files, so it need not be fast.
+ * Nor, for that matter, is it particularly accurate: the algorithm is
+ * simple replication of the input pixel onto the corresponding output
+ * pixels.  The hi-falutin sampling literature refers to this as a
+ * "box filter".  A box filter tends to introduce visible artifacts,
+ * so if you are actually going to use 3:1 or 4:1 sampling ratios
+ * you would be well advised to improve this code.
+ */
+
+METHODDEF(void)
+int_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	      JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  JSAMPARRAY output_data = *output_data_ptr;
+  register JSAMPROW inptr, outptr;
+  register JSAMPLE invalue;
+  register int h;
+  JSAMPROW outend;
+  int h_expand, v_expand;
+  int inrow, outrow;
+
+  h_expand = upsample->h_expand[compptr->component_index];
+  v_expand = upsample->v_expand[compptr->component_index];
+
+  inrow = outrow = 0;
+  while (outrow < cinfo->max_v_samp_factor) {
+    /* Generate one output row with proper horizontal expansion */
+    inptr = input_data[inrow];
+    outptr = output_data[outrow];
+    outend = outptr + cinfo->output_width;
+    while (outptr < outend) {
+      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
+      for (h = h_expand; h > 0; h--) {
+	*outptr++ = invalue;
+      }
+    }
+    /* Generate any additional output rows by duplicating the first one */
+    if (v_expand > 1) {
+      jcopy_sample_rows(output_data, outrow, output_data, outrow+1,
+			v_expand-1, cinfo->output_width);
+    }
+    inrow++;
+    outrow += v_expand;
+  }
+}
+
+
+/*
+ * Fast processing for the common case of 2:1 horizontal and 1:1 vertical.
+ * It's still a box filter.
+ */
+
+METHODDEF(void)
+h2v1_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  JSAMPARRAY output_data = *output_data_ptr;
+  register JSAMPROW inptr, outptr;
+  register JSAMPLE invalue;
+  JSAMPROW outend;
+  int outrow;
+
+  for (outrow = 0; outrow < cinfo->max_v_samp_factor; outrow++) {
+    inptr = input_data[outrow];
+    outptr = output_data[outrow];
+    outend = outptr + cinfo->output_width;
+    while (outptr < outend) {
+      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
+      *outptr++ = invalue;
+      *outptr++ = invalue;
+    }
+  }
+}
+
+
+/*
+ * Fast processing for the common case of 2:1 horizontal and 2:1 vertical.
+ * It's still a box filter.
+ */
+
+METHODDEF(void)
+h2v2_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  JSAMPARRAY output_data = *output_data_ptr;
+  register JSAMPROW inptr, outptr;
+  register JSAMPLE invalue;
+  JSAMPROW outend;
+  int inrow, outrow;
+
+  inrow = outrow = 0;
+  while (outrow < cinfo->max_v_samp_factor) {
+    inptr = input_data[inrow];
+    outptr = output_data[outrow];
+    outend = outptr + cinfo->output_width;
+    while (outptr < outend) {
+      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
+      *outptr++ = invalue;
+      *outptr++ = invalue;
+    }
+    jcopy_sample_rows(output_data, outrow, output_data, outrow+1,
+		      1, cinfo->output_width);
+    inrow++;
+    outrow += 2;
+  }
+}
+
+
+/*
+ * Module initialization routine for upsampling.
+ */
+
+GLOBAL(void)
+jinit_upsampler (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample;
+  int ci;
+  jpeg_component_info * compptr;
+  int h_in_group, v_in_group, h_out_group, v_out_group;
+
+  upsample = (my_upsample_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_upsampler));
+  cinfo->upsample = &upsample->pub;
+  upsample->pub.start_pass = start_pass_upsample;
+  upsample->pub.upsample = sep_upsample;
+  upsample->pub.need_context_rows = FALSE; /* until we find out differently */
+
+  if (cinfo->CCIR601_sampling)	/* this isn't supported */
+    ERREXIT(cinfo, JERR_CCIR601_NOTIMPL);
+
+  /* Verify we can handle the sampling factors, select per-component methods,
+   * and create storage as needed.
+   */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Compute size of an "input group" after IDCT scaling.  This many samples
+     * are to be converted to max_h_samp_factor * max_v_samp_factor pixels.
+     */
+    h_in_group = (compptr->h_samp_factor * compptr->DCT_h_scaled_size) /
+		 cinfo->min_DCT_h_scaled_size;
+    v_in_group = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
+		 cinfo->min_DCT_v_scaled_size;
+    h_out_group = cinfo->max_h_samp_factor;
+    v_out_group = cinfo->max_v_samp_factor;
+    upsample->rowgroup_height[ci] = v_in_group; /* save for use later */
+    if (! compptr->component_needed) {
+      /* Don't bother to upsample an uninteresting component. */
+      upsample->methods[ci] = noop_upsample;
+      continue;		/* don't need to allocate buffer */
+    }
+    if (h_in_group == h_out_group && v_in_group == v_out_group) {
+      /* Fullsize components can be processed without any work. */
+      upsample->methods[ci] = fullsize_upsample;
+      continue;		/* don't need to allocate buffer */
+    }
+    if (h_in_group * 2 == h_out_group && v_in_group == v_out_group) {
+      /* Special case for 2h1v upsampling */
+      upsample->methods[ci] = h2v1_upsample;
+    } else if (h_in_group * 2 == h_out_group &&
+	       v_in_group * 2 == v_out_group) {
+      /* Special case for 2h2v upsampling */
+      upsample->methods[ci] = h2v2_upsample;
+    } else if ((h_out_group % h_in_group) == 0 &&
+	       (v_out_group % v_in_group) == 0) {
+      /* Generic integral-factors upsampling method */
+      upsample->methods[ci] = int_upsample;
+      upsample->h_expand[ci] = (UINT8) (h_out_group / h_in_group);
+      upsample->v_expand[ci] = (UINT8) (v_out_group / v_in_group);
+    } else
+      ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL);
+    upsample->color_buf[ci] = (*cinfo->mem->alloc_sarray)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       (JDIMENSION) jround_up((long) cinfo->output_width,
+			      (long) cinfo->max_h_samp_factor),
+       (JDIMENSION) cinfo->max_v_samp_factor);
+  }
+}
diff --git a/libraries/jpeg/jerror.c b/libraries/jpeg/jerror.c
new file mode 100644
index 000000000..7163af699
--- /dev/null
+++ b/libraries/jpeg/jerror.c
@@ -0,0 +1,253 @@
+/*
+ * jerror.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * Modified 2012-2015 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains simple error-reporting and trace-message routines.
+ * These are suitable for Unix-like systems and others where writing to
+ * stderr is the right thing to do.  Many applications will want to replace
+ * some or all of these routines.
+ *
+ * If you define USE_WINDOWS_MESSAGEBOX in jconfig.h or in the makefile,
+ * you get a Windows-specific hack to display error messages in a dialog box.
+ * It ain't much, but it beats dropping error messages into the bit bucket,
+ * which is what happens to output to stderr under most Windows C compilers.
+ *
+ * These routines are used by both the compression and decompression code.
+ */
+
+#ifdef USE_WINDOWS_MESSAGEBOX
+#include <windows.h>
+#endif
+
+/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jversion.h"
+#include "jerror.h"
+
+#ifndef EXIT_FAILURE		/* define exit() codes if not provided */
+#define EXIT_FAILURE  1
+#endif
+
+
+/*
+ * Create the message string table.
+ * We do this from the master message list in jerror.h by re-reading
+ * jerror.h with a suitable definition for macro JMESSAGE.
+ * The message table is made an external symbol just in case any applications
+ * want to refer to it directly.
+ */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_std_message_table	jMsgTable
+#endif
+
+#define JMESSAGE(code,string)	string ,
+
+const char * const jpeg_std_message_table[] = {
+#include "jerror.h"
+  NULL
+};
+
+
+/*
+ * Error exit handler: must not return to caller.
+ *
+ * Applications may override this if they want to get control back after
+ * an error.  Typically one would longjmp somewhere instead of exiting.
+ * The setjmp buffer can be made a private field within an expanded error
+ * handler object.  Note that the info needed to generate an error message
+ * is stored in the error object, so you can generate the message now or
+ * later, at your convenience.
+ * You should make sure that the JPEG object is cleaned up (with jpeg_abort
+ * or jpeg_destroy) at some point.
+ */
+
+METHODDEF(noreturn_t)
+error_exit (j_common_ptr cinfo)
+{
+  /* Always display the message */
+  (*cinfo->err->output_message) (cinfo);
+
+  /* Let the memory manager delete any temp files before we die */
+  jpeg_destroy(cinfo);
+
+  exit(EXIT_FAILURE);
+}
+
+
+/*
+ * Actual output of an error or trace message.
+ * Applications may override this method to send JPEG messages somewhere
+ * other than stderr.
+ *
+ * On Windows, printing to stderr is generally completely useless,
+ * so we provide optional code to produce an error-dialog popup.
+ * Most Windows applications will still prefer to override this routine,
+ * but if they don't, it'll do something at least marginally useful.
+ *
+ * NOTE: to use the library in an environment that doesn't support the
+ * C stdio library, you may have to delete the call to fprintf() entirely,
+ * not just not use this routine.
+ */
+
+METHODDEF(void)
+output_message (j_common_ptr cinfo)
+{
+  char buffer[JMSG_LENGTH_MAX];
+
+  /* Create the message */
+  (*cinfo->err->format_message) (cinfo, buffer);
+
+#ifdef USE_WINDOWS_MESSAGEBOX
+  /* Display it in a message dialog box */
+  MessageBox(GetActiveWindow(), buffer, "JPEG Library Error",
+	     MB_OK | MB_ICONERROR);
+#else
+  /* Send it to stderr, adding a newline */
+  fprintf(stderr, "%s\n", buffer);
+#endif
+}
+
+
+/*
+ * Decide whether to emit a trace or warning message.
+ * msg_level is one of:
+ *   -1: recoverable corrupt-data warning, may want to abort.
+ *    0: important advisory messages (always display to user).
+ *    1: first level of tracing detail.
+ *    2,3,...: successively more detailed tracing messages.
+ * An application might override this method if it wanted to abort on warnings
+ * or change the policy about which messages to display.
+ */
+
+METHODDEF(void)
+emit_message (j_common_ptr cinfo, int msg_level)
+{
+  struct jpeg_error_mgr * err = cinfo->err;
+
+  if (msg_level < 0) {
+    /* It's a warning message.  Since corrupt files may generate many warnings,
+     * the policy implemented here is to show only the first warning,
+     * unless trace_level >= 3.
+     */
+    if (err->num_warnings == 0 || err->trace_level >= 3)
+      (*err->output_message) (cinfo);
+    /* Always count warnings in num_warnings. */
+    err->num_warnings++;
+  } else {
+    /* It's a trace message.  Show it if trace_level >= msg_level. */
+    if (err->trace_level >= msg_level)
+      (*err->output_message) (cinfo);
+  }
+}
+
+
+/*
+ * Format a message string for the most recent JPEG error or message.
+ * The message is stored into buffer, which should be at least JMSG_LENGTH_MAX
+ * characters.  Note that no '\n' character is added to the string.
+ * Few applications should need to override this method.
+ */
+
+METHODDEF(void)
+format_message (j_common_ptr cinfo, char * buffer)
+{
+  struct jpeg_error_mgr * err = cinfo->err;
+  int msg_code = err->msg_code;
+  const char * msgtext = NULL;
+  const char * msgptr;
+  char ch;
+  boolean isstring;
+
+  /* Look up message string in proper table */
+  if (msg_code > 0 && msg_code <= err->last_jpeg_message) {
+    msgtext = err->jpeg_message_table[msg_code];
+  } else if (err->addon_message_table != NULL &&
+	     msg_code >= err->first_addon_message &&
+	     msg_code <= err->last_addon_message) {
+    msgtext = err->addon_message_table[msg_code - err->first_addon_message];
+  }
+
+  /* Defend against bogus message number */
+  if (msgtext == NULL) {
+    err->msg_parm.i[0] = msg_code;
+    msgtext = err->jpeg_message_table[0];
+  }
+
+  /* Check for string parameter, as indicated by %s in the message text */
+  isstring = FALSE;
+  msgptr = msgtext;
+  while ((ch = *msgptr++) != '\0') {
+    if (ch == '%') {
+      if (*msgptr == 's') isstring = TRUE;
+      break;
+    }
+  }
+
+  /* Format the message into the passed buffer */
+  if (isstring)
+    sprintf(buffer, msgtext, err->msg_parm.s);
+  else
+    sprintf(buffer, msgtext,
+	    err->msg_parm.i[0], err->msg_parm.i[1],
+	    err->msg_parm.i[2], err->msg_parm.i[3],
+	    err->msg_parm.i[4], err->msg_parm.i[5],
+	    err->msg_parm.i[6], err->msg_parm.i[7]);
+}
+
+
+/*
+ * Reset error state variables at start of a new image.
+ * This is called during compression startup to reset trace/error
+ * processing to default state, without losing any application-specific
+ * method pointers.  An application might possibly want to override
+ * this method if it has additional error processing state.
+ */
+
+METHODDEF(void)
+reset_error_mgr (j_common_ptr cinfo)
+{
+  cinfo->err->num_warnings = 0;
+  /* trace_level is not reset since it is an application-supplied parameter */
+  cinfo->err->msg_code = 0;	/* may be useful as a flag for "no error" */
+}
+
+
+/*
+ * Fill in the standard error-handling methods in a jpeg_error_mgr object.
+ * Typical call is:
+ *	struct jpeg_compress_struct cinfo;
+ *	struct jpeg_error_mgr err;
+ *
+ *	cinfo.err = jpeg_std_error(&err);
+ * after which the application may override some of the methods.
+ */
+
+GLOBAL(struct jpeg_error_mgr *)
+jpeg_std_error (struct jpeg_error_mgr * err)
+{
+  err->error_exit = error_exit;
+  err->emit_message = emit_message;
+  err->output_message = output_message;
+  err->format_message = format_message;
+  err->reset_error_mgr = reset_error_mgr;
+
+  err->trace_level = 0;		/* default = no tracing */
+  err->num_warnings = 0;	/* no warnings emitted yet */
+  err->msg_code = 0;		/* may be useful as a flag for "no error" */
+
+  /* Initialize message table pointers */
+  err->jpeg_message_table = jpeg_std_message_table;
+  err->last_jpeg_message = (int) JMSG_LASTMSGCODE - 1;
+
+  err->addon_message_table = NULL;
+  err->first_addon_message = 0;	/* for safety */
+  err->last_addon_message = 0;
+
+  return err;
+}
diff --git a/libraries/jpeg/jerror.h b/libraries/jpeg/jerror.h
new file mode 100644
index 000000000..a4b661f71
--- /dev/null
+++ b/libraries/jpeg/jerror.h
@@ -0,0 +1,304 @@
+/*
+ * jerror.h
+ *
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * Modified 1997-2012 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file defines the error and message codes for the JPEG library.
+ * Edit this file to add new codes, or to translate the message strings to
+ * some other language.
+ * A set of error-reporting macros are defined too.  Some applications using
+ * the JPEG library may wish to include this file to get the error codes
+ * and/or the macros.
+ */
+
+/*
+ * To define the enum list of message codes, include this file without
+ * defining macro JMESSAGE.  To create a message string table, include it
+ * again with a suitable JMESSAGE definition (see jerror.c for an example).
+ */
+#ifndef JMESSAGE
+#ifndef JERROR_H
+/* First time through, define the enum list */
+#define JMAKE_ENUM_LIST
+#else
+/* Repeated inclusions of this file are no-ops unless JMESSAGE is defined */
+#define JMESSAGE(code,string)
+#endif /* JERROR_H */
+#endif /* JMESSAGE */
+
+#ifdef JMAKE_ENUM_LIST
+
+typedef enum {
+
+#define JMESSAGE(code,string)	code ,
+
+#endif /* JMAKE_ENUM_LIST */
+
+JMESSAGE(JMSG_NOMESSAGE, "Bogus message code %d") /* Must be first entry! */
+
+/* For maintenance convenience, list is alphabetical by message code name */
+JMESSAGE(JERR_BAD_ALIGN_TYPE, "ALIGN_TYPE is wrong, please fix")
+JMESSAGE(JERR_BAD_ALLOC_CHUNK, "MAX_ALLOC_CHUNK is wrong, please fix")
+JMESSAGE(JERR_BAD_BUFFER_MODE, "Bogus buffer control mode")
+JMESSAGE(JERR_BAD_COMPONENT_ID, "Invalid component ID %d in SOS")
+JMESSAGE(JERR_BAD_CROP_SPEC, "Invalid crop request")
+JMESSAGE(JERR_BAD_DCT_COEF, "DCT coefficient out of range")
+JMESSAGE(JERR_BAD_DCTSIZE, "DCT scaled block size %dx%d not supported")
+JMESSAGE(JERR_BAD_DROP_SAMPLING,
+	 "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c")
+JMESSAGE(JERR_BAD_HUFF_TABLE, "Bogus Huffman table definition")
+JMESSAGE(JERR_BAD_IN_COLORSPACE, "Bogus input colorspace")
+JMESSAGE(JERR_BAD_J_COLORSPACE, "Bogus JPEG colorspace")
+JMESSAGE(JERR_BAD_LENGTH, "Bogus marker length")
+JMESSAGE(JERR_BAD_LIB_VERSION,
+	 "Wrong JPEG library version: library is %d, caller expects %d")
+JMESSAGE(JERR_BAD_MCU_SIZE, "Sampling factors too large for interleaved scan")
+JMESSAGE(JERR_BAD_POOL_ID, "Invalid memory pool code %d")
+JMESSAGE(JERR_BAD_PRECISION, "Unsupported JPEG data precision %d")
+JMESSAGE(JERR_BAD_PROGRESSION,
+	 "Invalid progressive parameters Ss=%d Se=%d Ah=%d Al=%d")
+JMESSAGE(JERR_BAD_PROG_SCRIPT,
+	 "Invalid progressive parameters at scan script entry %d")
+JMESSAGE(JERR_BAD_SAMPLING, "Bogus sampling factors")
+JMESSAGE(JERR_BAD_SCAN_SCRIPT, "Invalid scan script at entry %d")
+JMESSAGE(JERR_BAD_STATE, "Improper call to JPEG library in state %d")
+JMESSAGE(JERR_BAD_STRUCT_SIZE,
+	 "JPEG parameter struct mismatch: library thinks size is %u, caller expects %u")
+JMESSAGE(JERR_BAD_VIRTUAL_ACCESS, "Bogus virtual array access")
+JMESSAGE(JERR_BUFFER_SIZE, "Buffer passed to JPEG library is too small")
+JMESSAGE(JERR_CANT_SUSPEND, "Suspension not allowed here")
+JMESSAGE(JERR_CCIR601_NOTIMPL, "CCIR601 sampling not implemented yet")
+JMESSAGE(JERR_COMPONENT_COUNT, "Too many color components: %d, max %d")
+JMESSAGE(JERR_CONVERSION_NOTIMPL, "Unsupported color conversion request")
+JMESSAGE(JERR_DAC_INDEX, "Bogus DAC index %d")
+JMESSAGE(JERR_DAC_VALUE, "Bogus DAC value 0x%x")
+JMESSAGE(JERR_DHT_INDEX, "Bogus DHT index %d")
+JMESSAGE(JERR_DQT_INDEX, "Bogus DQT index %d")
+JMESSAGE(JERR_EMPTY_IMAGE, "Empty JPEG image (DNL not supported)")
+JMESSAGE(JERR_EMS_READ, "Read from EMS failed")
+JMESSAGE(JERR_EMS_WRITE, "Write to EMS failed")
+JMESSAGE(JERR_EOI_EXPECTED, "Didn't expect more than one scan")
+JMESSAGE(JERR_FILE_READ, "Input file read error")
+JMESSAGE(JERR_FILE_WRITE, "Output file write error --- out of disk space?")
+JMESSAGE(JERR_FRACT_SAMPLE_NOTIMPL, "Fractional sampling not implemented yet")
+JMESSAGE(JERR_HUFF_CLEN_OVERFLOW, "Huffman code size table overflow")
+JMESSAGE(JERR_HUFF_MISSING_CODE, "Missing Huffman code table entry")
+JMESSAGE(JERR_IMAGE_TOO_BIG, "Maximum supported image dimension is %u pixels")
+JMESSAGE(JERR_INPUT_EMPTY, "Empty input file")
+JMESSAGE(JERR_INPUT_EOF, "Premature end of input file")
+JMESSAGE(JERR_MISMATCHED_QUANT_TABLE,
+	 "Cannot transcode due to multiple use of quantization table %d")
+JMESSAGE(JERR_MISSING_DATA, "Scan script does not transmit all data")
+JMESSAGE(JERR_MODE_CHANGE, "Invalid color quantization mode change")
+JMESSAGE(JERR_NOTIMPL, "Not implemented yet")
+JMESSAGE(JERR_NOT_COMPILED, "Requested feature was omitted at compile time")
+JMESSAGE(JERR_NO_ARITH_TABLE, "Arithmetic table 0x%02x was not defined")
+JMESSAGE(JERR_NO_BACKING_STORE, "Backing store not supported")
+JMESSAGE(JERR_NO_HUFF_TABLE, "Huffman table 0x%02x was not defined")
+JMESSAGE(JERR_NO_IMAGE, "JPEG datastream contains no image")
+JMESSAGE(JERR_NO_QUANT_TABLE, "Quantization table 0x%02x was not defined")
+JMESSAGE(JERR_NO_SOI, "Not a JPEG file: starts with 0x%02x 0x%02x")
+JMESSAGE(JERR_OUT_OF_MEMORY, "Insufficient memory (case %d)")
+JMESSAGE(JERR_QUANT_COMPONENTS,
+	 "Cannot quantize more than %d color components")
+JMESSAGE(JERR_QUANT_FEW_COLORS, "Cannot quantize to fewer than %d colors")
+JMESSAGE(JERR_QUANT_MANY_COLORS, "Cannot quantize to more than %d colors")
+JMESSAGE(JERR_SOF_BEFORE, "Invalid JPEG file structure: %s before SOF")
+JMESSAGE(JERR_SOF_DUPLICATE, "Invalid JPEG file structure: two SOF markers")
+JMESSAGE(JERR_SOF_NO_SOS, "Invalid JPEG file structure: missing SOS marker")
+JMESSAGE(JERR_SOF_UNSUPPORTED, "Unsupported JPEG process: SOF type 0x%02x")
+JMESSAGE(JERR_SOI_DUPLICATE, "Invalid JPEG file structure: two SOI markers")
+JMESSAGE(JERR_TFILE_CREATE, "Failed to create temporary file %s")
+JMESSAGE(JERR_TFILE_READ, "Read failed on temporary file")
+JMESSAGE(JERR_TFILE_SEEK, "Seek failed on temporary file")
+JMESSAGE(JERR_TFILE_WRITE,
+	 "Write failed on temporary file --- out of disk space?")
+JMESSAGE(JERR_TOO_LITTLE_DATA, "Application transferred too few scanlines")
+JMESSAGE(JERR_UNKNOWN_MARKER, "Unsupported marker type 0x%02x")
+JMESSAGE(JERR_VIRTUAL_BUG, "Virtual array controller messed up")
+JMESSAGE(JERR_WIDTH_OVERFLOW, "Image too wide for this implementation")
+JMESSAGE(JERR_XMS_READ, "Read from XMS failed")
+JMESSAGE(JERR_XMS_WRITE, "Write to XMS failed")
+JMESSAGE(JMSG_COPYRIGHT, JCOPYRIGHT)
+JMESSAGE(JMSG_VERSION, JVERSION)
+JMESSAGE(JTRC_16BIT_TABLES,
+	 "Caution: quantization tables are too coarse for baseline JPEG")
+JMESSAGE(JTRC_ADOBE,
+	 "Adobe APP14 marker: version %d, flags 0x%04x 0x%04x, transform %d")
+JMESSAGE(JTRC_APP0, "Unknown APP0 marker (not JFIF), length %u")
+JMESSAGE(JTRC_APP14, "Unknown APP14 marker (not Adobe), length %u")
+JMESSAGE(JTRC_DAC, "Define Arithmetic Table 0x%02x: 0x%02x")
+JMESSAGE(JTRC_DHT, "Define Huffman Table 0x%02x")
+JMESSAGE(JTRC_DQT, "Define Quantization Table %d  precision %d")
+JMESSAGE(JTRC_DRI, "Define Restart Interval %u")
+JMESSAGE(JTRC_EMS_CLOSE, "Freed EMS handle %u")
+JMESSAGE(JTRC_EMS_OPEN, "Obtained EMS handle %u")
+JMESSAGE(JTRC_EOI, "End Of Image")
+JMESSAGE(JTRC_HUFFBITS, "        %3d %3d %3d %3d %3d %3d %3d %3d")
+JMESSAGE(JTRC_JFIF, "JFIF APP0 marker: version %d.%02d, density %dx%d  %d")
+JMESSAGE(JTRC_JFIF_BADTHUMBNAILSIZE,
+	 "Warning: thumbnail image size does not match data length %u")
+JMESSAGE(JTRC_JFIF_EXTENSION,
+	 "JFIF extension marker: type 0x%02x, length %u")
+JMESSAGE(JTRC_JFIF_THUMBNAIL, "    with %d x %d thumbnail image")
+JMESSAGE(JTRC_MISC_MARKER, "Miscellaneous marker 0x%02x, length %u")
+JMESSAGE(JTRC_PARMLESS_MARKER, "Unexpected marker 0x%02x")
+JMESSAGE(JTRC_QUANTVALS, "        %4u %4u %4u %4u %4u %4u %4u %4u")
+JMESSAGE(JTRC_QUANT_3_NCOLORS, "Quantizing to %d = %d*%d*%d colors")
+JMESSAGE(JTRC_QUANT_NCOLORS, "Quantizing to %d colors")
+JMESSAGE(JTRC_QUANT_SELECTED, "Selected %d colors for quantization")
+JMESSAGE(JTRC_RECOVERY_ACTION, "At marker 0x%02x, recovery action %d")
+JMESSAGE(JTRC_RST, "RST%d")
+JMESSAGE(JTRC_SMOOTH_NOTIMPL,
+	 "Smoothing not supported with nonstandard sampling ratios")
+JMESSAGE(JTRC_SOF, "Start Of Frame 0x%02x: width=%u, height=%u, components=%d")
+JMESSAGE(JTRC_SOF_COMPONENT, "    Component %d: %dhx%dv q=%d")
+JMESSAGE(JTRC_SOI, "Start of Image")
+JMESSAGE(JTRC_SOS, "Start Of Scan: %d components")
+JMESSAGE(JTRC_SOS_COMPONENT, "    Component %d: dc=%d ac=%d")
+JMESSAGE(JTRC_SOS_PARAMS, "  Ss=%d, Se=%d, Ah=%d, Al=%d")
+JMESSAGE(JTRC_TFILE_CLOSE, "Closed temporary file %s")
+JMESSAGE(JTRC_TFILE_OPEN, "Opened temporary file %s")
+JMESSAGE(JTRC_THUMB_JPEG,
+	 "JFIF extension marker: JPEG-compressed thumbnail image, length %u")
+JMESSAGE(JTRC_THUMB_PALETTE,
+	 "JFIF extension marker: palette thumbnail image, length %u")
+JMESSAGE(JTRC_THUMB_RGB,
+	 "JFIF extension marker: RGB thumbnail image, length %u")
+JMESSAGE(JTRC_UNKNOWN_IDS,
+	 "Unrecognized component IDs %d %d %d, assuming YCbCr")
+JMESSAGE(JTRC_XMS_CLOSE, "Freed XMS handle %u")
+JMESSAGE(JTRC_XMS_OPEN, "Obtained XMS handle %u")
+JMESSAGE(JWRN_ADOBE_XFORM, "Unknown Adobe color transform code %d")
+JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code")
+JMESSAGE(JWRN_BOGUS_PROGRESSION,
+	 "Inconsistent progression sequence for component %d coefficient %d")
+JMESSAGE(JWRN_EXTRANEOUS_DATA,
+	 "Corrupt JPEG data: %u extraneous bytes before marker 0x%02x")
+JMESSAGE(JWRN_HIT_MARKER, "Corrupt JPEG data: premature end of data segment")
+JMESSAGE(JWRN_HUFF_BAD_CODE, "Corrupt JPEG data: bad Huffman code")
+JMESSAGE(JWRN_JFIF_MAJOR, "Warning: unknown JFIF revision number %d.%02d")
+JMESSAGE(JWRN_JPEG_EOF, "Premature end of JPEG file")
+JMESSAGE(JWRN_MUST_RESYNC,
+	 "Corrupt JPEG data: found marker 0x%02x instead of RST%d")
+JMESSAGE(JWRN_NOT_SEQUENTIAL, "Invalid SOS parameters for sequential JPEG")
+JMESSAGE(JWRN_TOO_MUCH_DATA, "Application transferred too many scanlines")
+
+#ifdef JMAKE_ENUM_LIST
+
+  JMSG_LASTMSGCODE
+} J_MESSAGE_CODE;
+
+#undef JMAKE_ENUM_LIST
+#endif /* JMAKE_ENUM_LIST */
+
+/* Zap JMESSAGE macro so that future re-inclusions do nothing by default */
+#undef JMESSAGE
+
+
+#ifndef JERROR_H
+#define JERROR_H
+
+/* Macros to simplify using the error and trace message stuff */
+/* The first parameter is either type of cinfo pointer */
+
+/* Fatal errors (print message and exit) */
+#define ERREXIT(cinfo,code)  \
+  ((cinfo)->err->msg_code = (code), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT1(cinfo,code,p1)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT2(cinfo,code,p1,p2)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT3(cinfo,code,p1,p2,p3)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (cinfo)->err->msg_parm.i[2] = (p3), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT4(cinfo,code,p1,p2,p3,p4)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (cinfo)->err->msg_parm.i[2] = (p3), \
+   (cinfo)->err->msg_parm.i[3] = (p4), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT6(cinfo,code,p1,p2,p3,p4,p5,p6)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (cinfo)->err->msg_parm.i[2] = (p3), \
+   (cinfo)->err->msg_parm.i[3] = (p4), \
+   (cinfo)->err->msg_parm.i[4] = (p5), \
+   (cinfo)->err->msg_parm.i[5] = (p6), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXITS(cinfo,code,str)  \
+  ((cinfo)->err->msg_code = (code), \
+   strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+
+#define MAKESTMT(stuff)		do { stuff } while (0)
+
+/* Nonfatal errors (we can keep going, but the data is probably corrupt) */
+#define WARNMS(cinfo,code)  \
+  ((cinfo)->err->msg_code = (code), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1))
+#define WARNMS1(cinfo,code,p1)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1))
+#define WARNMS2(cinfo,code,p1,p2)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1))
+
+/* Informational/debugging messages */
+#define TRACEMS(cinfo,lvl,code)  \
+  ((cinfo)->err->msg_code = (code), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
+#define TRACEMS1(cinfo,lvl,code,p1)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
+#define TRACEMS2(cinfo,lvl,code,p1,p2)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
+#define TRACEMS3(cinfo,lvl,code,p1,p2,p3)  \
+  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
+	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); \
+	   (cinfo)->err->msg_code = (code); \
+	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+#define TRACEMS4(cinfo,lvl,code,p1,p2,p3,p4)  \
+  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
+	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
+	   (cinfo)->err->msg_code = (code); \
+	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+#define TRACEMS5(cinfo,lvl,code,p1,p2,p3,p4,p5)  \
+  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
+	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
+	   _mp[4] = (p5); \
+	   (cinfo)->err->msg_code = (code); \
+	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+#define TRACEMS8(cinfo,lvl,code,p1,p2,p3,p4,p5,p6,p7,p8)  \
+  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
+	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
+	   _mp[4] = (p5); _mp[5] = (p6); _mp[6] = (p7); _mp[7] = (p8); \
+	   (cinfo)->err->msg_code = (code); \
+	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+#define TRACEMSS(cinfo,lvl,code,str)  \
+  ((cinfo)->err->msg_code = (code), \
+   strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
+
+#endif /* JERROR_H */
diff --git a/libraries/jpeg/jidctflt.c b/libraries/jpeg/jidctflt.c
new file mode 100644
index 000000000..e33a2b5e4
--- /dev/null
+++ b/libraries/jpeg/jidctflt.c
@@ -0,0 +1,238 @@
+/*
+ * jidctflt.c
+ *
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * Modified 2010-2017 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a floating-point implementation of the
+ * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
+ * must also perform dequantization of the input coefficients.
+ *
+ * This implementation should be more accurate than either of the integer
+ * IDCT implementations.  However, it may not give the same results on all
+ * machines because of differences in roundoff behavior.  Speed will depend
+ * on the hardware's floating point capacity.
+ *
+ * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
+ * on each row (or vice versa, but it's more convenient to emit a row at
+ * a time).  Direct algorithms are also available, but they are much more
+ * complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README).  The following code
+ * is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with a fixed-point
+ * implementation, accuracy is lost due to imprecise representation of the
+ * scaled quantization values.  However, that problem does not arise if
+ * we use floating point arithmetic.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_FLOAT_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
+#endif
+
+
+/* Dequantize a coefficient by multiplying it by the multiplier-table
+ * entry; produce a float result.
+ */
+
+#define DEQUANTIZE(coef,quantval)  (((FAST_FLOAT) (coef)) * (quantval))
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients.
+ *
+ * cK represents cos(K*pi/16).
+ */
+
+GLOBAL(void)
+jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
+  FAST_FLOAT z5, z10, z11, z12, z13;
+  JCOEFPTR inptr;
+  FLOAT_MULT_TYPE * quantptr;
+  FAST_FLOAT * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (FLOAT_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+	inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero */
+      FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      wsptr[DCTSIZE*2] = dcval;
+      wsptr[DCTSIZE*3] = dcval;
+      wsptr[DCTSIZE*4] = dcval;
+      wsptr[DCTSIZE*5] = dcval;
+      wsptr[DCTSIZE*6] = dcval;
+      wsptr[DCTSIZE*7] = dcval;
+
+      inptr++;			/* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = tmp0 + tmp2;	/* phase 3 */
+    tmp11 = tmp0 - tmp2;
+
+    tmp13 = tmp1 + tmp3;	/* phases 5-3 */
+    tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */
+
+    tmp0 = tmp10 + tmp13;	/* phase 2 */
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    z13 = tmp6 + tmp5;		/* phase 6 */
+    z10 = tmp6 - tmp5;
+    z11 = tmp4 + tmp7;
+    z12 = tmp4 - tmp7;
+
+    tmp7 = z11 + z13;		/* phase 5 */
+    tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */
+
+    z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
+    tmp10 = z5 - z12 * ((FAST_FLOAT) 1.082392200); /* 2*(c2-c6) */
+    tmp12 = z5 - z10 * ((FAST_FLOAT) 2.613125930); /* 2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;	/* phase 2 */
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 - tmp5;
+
+    wsptr[DCTSIZE*0] = tmp0 + tmp7;
+    wsptr[DCTSIZE*7] = tmp0 - tmp7;
+    wsptr[DCTSIZE*1] = tmp1 + tmp6;
+    wsptr[DCTSIZE*6] = tmp1 - tmp6;
+    wsptr[DCTSIZE*2] = tmp2 + tmp5;
+    wsptr[DCTSIZE*5] = tmp2 - tmp5;
+    wsptr[DCTSIZE*3] = tmp3 + tmp4;
+    wsptr[DCTSIZE*4] = tmp3 - tmp4;
+
+    inptr++;			/* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+
+  /* Pass 2: process rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+    /* Rows of zeroes can be exploited in the same way as we did with columns.
+     * However, the column calculation has created many nonzero AC terms, so
+     * the simplification applies less often (typically 5% to 10% of the time).
+     * And testing floats for zero is relatively expensive, so we don't bother.
+     */
+
+    /* Even part */
+
+    /* Prepare range-limit and float->int conversion */
+    z5 = wsptr[0] + (((FAST_FLOAT) RANGE_CENTER) + ((FAST_FLOAT) 0.5));
+    tmp10 = z5 + wsptr[4];
+    tmp11 = z5 - wsptr[4];
+
+    tmp13 = wsptr[2] + wsptr[6];
+    tmp12 = (wsptr[2] - wsptr[6]) *
+	      ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */
+
+    tmp0 = tmp10 + tmp13;
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z13 = wsptr[5] + wsptr[3];
+    z10 = wsptr[5] - wsptr[3];
+    z11 = wsptr[1] + wsptr[7];
+    z12 = wsptr[1] - wsptr[7];
+
+    tmp7 = z11 + z13;		/* phase 5 */
+    tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */
+
+    z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
+    tmp10 = z5 - z12 * ((FAST_FLOAT) 1.082392200); /* 2*(c2-c6) */
+    tmp12 = z5 - z10 * ((FAST_FLOAT) 2.613125930); /* 2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;	/* phase 2 */
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 - tmp5;
+
+    /* Final output stage: float->int conversion and range-limit */
+
+    outptr[0] = range_limit[(int) (tmp0 + tmp7) & RANGE_MASK];
+    outptr[7] = range_limit[(int) (tmp0 - tmp7) & RANGE_MASK];
+    outptr[1] = range_limit[(int) (tmp1 + tmp6) & RANGE_MASK];
+    outptr[6] = range_limit[(int) (tmp1 - tmp6) & RANGE_MASK];
+    outptr[2] = range_limit[(int) (tmp2 + tmp5) & RANGE_MASK];
+    outptr[5] = range_limit[(int) (tmp2 - tmp5) & RANGE_MASK];
+    outptr[3] = range_limit[(int) (tmp3 + tmp4) & RANGE_MASK];
+    outptr[4] = range_limit[(int) (tmp3 - tmp4) & RANGE_MASK];
+
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+#endif /* DCT_FLOAT_SUPPORTED */
diff --git a/libraries/jpeg/jidctfst.c b/libraries/jpeg/jidctfst.c
new file mode 100644
index 000000000..1ac3e39cb
--- /dev/null
+++ b/libraries/jpeg/jidctfst.c
@@ -0,0 +1,351 @@
+/*
+ * jidctfst.c
+ *
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * Modified 2015-2017 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a fast, not so accurate integer implementation of the
+ * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
+ * must also perform dequantization of the input coefficients.
+ *
+ * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
+ * on each row (or vice versa, but it's more convenient to emit a row at
+ * a time).  Direct algorithms are also available, but they are much more
+ * complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README).  The following code
+ * is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with fixed-point math,
+ * accuracy is lost due to imprecise representation of the scaled
+ * quantization values.  The smaller the quantization table entry, the less
+ * precise the scaled value, so this implementation does worse with high-
+ * quality-setting files than with low-quality ones.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_IFAST_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
+#endif
+
+
+/* Scaling decisions are generally the same as in the LL&M algorithm;
+ * see jidctint.c for more details.  However, we choose to descale
+ * (right shift) multiplication products as soon as they are formed,
+ * rather than carrying additional fractional bits into subsequent additions.
+ * This compromises accuracy slightly, but it lets us save a few shifts.
+ * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
+ * everywhere except in the multiplications proper; this saves a good deal
+ * of work on 16-bit-int machines.
+ *
+ * The dequantized coefficients are not integers because the AA&N scaling
+ * factors have been incorporated.  We represent them scaled up by PASS1_BITS,
+ * so that the first and second IDCT rounds have the same input scaling.
+ * For 8-bit JSAMPLEs, we choose IFAST_SCALE_BITS = PASS1_BITS so as to
+ * avoid a descaling shift; this compromises accuracy rather drastically
+ * for small quantization table entries, but it saves a lot of shifts.
+ * For 12-bit JSAMPLEs, there's no hope of using 16x16 multiplies anyway,
+ * so we use a much larger scaling factor to preserve accuracy.
+ *
+ * A final compromise is to represent the multiplicative constants to only
+ * 8 fractional bits, rather than 13.  This saves some shifting work on some
+ * machines, and may also reduce the cost of multiplication (since there
+ * are fewer one-bits in the constants).
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS  8
+#define PASS1_BITS  2
+#else
+#define CONST_BITS  8
+#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 8
+#define FIX_1_082392200  ((INT32)  277)		/* FIX(1.082392200) */
+#define FIX_1_414213562  ((INT32)  362)		/* FIX(1.414213562) */
+#define FIX_1_847759065  ((INT32)  473)		/* FIX(1.847759065) */
+#define FIX_2_613125930  ((INT32)  669)		/* FIX(2.613125930) */
+#else
+#define FIX_1_082392200  FIX(1.082392200)
+#define FIX_1_414213562  FIX(1.414213562)
+#define FIX_1_847759065  FIX(1.847759065)
+#define FIX_2_613125930  FIX(2.613125930)
+#endif
+
+
+/* We can gain a little more speed, with a further compromise in accuracy,
+ * by omitting the addition in a descaling shift.  This yields an incorrectly
+ * rounded result half the time...
+ */
+
+#ifndef USE_ACCURATE_ROUNDING
+#undef DESCALE
+#define DESCALE(x,n)  RIGHT_SHIFT(x, n)
+#endif
+
+
+/* Multiply a DCTELEM variable by an INT32 constant, and immediately
+ * descale to yield a DCTELEM result.
+ */
+
+#define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
+
+
+/* Dequantize a coefficient by multiplying it by the multiplier-table
+ * entry; produce a DCTELEM result.  For 8-bit data a 16x16->16
+ * multiplication will do.  For 12-bit data, the multiplier table is
+ * declared INT32, so a 32-bit multiply will be used.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define DEQUANTIZE(coef,quantval)  (((IFAST_MULT_TYPE) (coef)) * (quantval))
+#else
+#define DEQUANTIZE(coef,quantval)  \
+	DESCALE((coef)*(quantval), IFAST_SCALE_BITS-PASS1_BITS)
+#endif
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients.
+ *
+ * cK represents cos(K*pi/16).
+ */
+
+GLOBAL(void)
+jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  DCTELEM tmp10, tmp11, tmp12, tmp13;
+  DCTELEM z5, z10, z11, z12, z13;
+  JCOEFPTR inptr;
+  IFAST_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[DCTSIZE2];	/* buffers data between passes */
+  SHIFT_TEMPS			/* for DESCALE */
+  ISHIFT_TEMPS			/* for IRIGHT_SHIFT */
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (IFAST_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+    
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+	inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero */
+      int dcval = (int) DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      wsptr[DCTSIZE*2] = dcval;
+      wsptr[DCTSIZE*3] = dcval;
+      wsptr[DCTSIZE*4] = dcval;
+      wsptr[DCTSIZE*5] = dcval;
+      wsptr[DCTSIZE*6] = dcval;
+      wsptr[DCTSIZE*7] = dcval;
+      
+      inptr++;			/* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+    
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = tmp0 + tmp2;	/* phase 3 */
+    tmp11 = tmp0 - tmp2;
+
+    tmp13 = tmp1 + tmp3;	/* phases 5-3 */
+    tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */
+
+    tmp0 = tmp10 + tmp13;	/* phase 2 */
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+    
+    /* Odd part */
+
+    tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    z13 = tmp6 + tmp5;		/* phase 6 */
+    z10 = tmp6 - tmp5;
+    z11 = tmp4 + tmp7;
+    z12 = tmp4 - tmp7;
+
+    tmp7 = z11 + z13;		/* phase 5 */
+    tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
+
+    z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
+    tmp10 = z5 - MULTIPLY(z12, FIX_1_082392200); /* 2*(c2-c6) */
+    tmp12 = z5 - MULTIPLY(z10, FIX_2_613125930); /* 2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;	/* phase 2 */
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 - tmp5;
+
+    wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7);
+    wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7);
+    wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6);
+    wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6);
+    wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5);
+    wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5);
+    wsptr[DCTSIZE*3] = (int) (tmp3 + tmp4);
+    wsptr[DCTSIZE*4] = (int) (tmp3 - tmp4);
+
+    inptr++;			/* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+  
+  /* Pass 2: process rows from work array, store into output array.
+   * Note that we must descale the results by a factor of 8 == 2**3,
+   * and also undo the PASS1_BITS scaling.
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z5 = (DCTELEM) wsptr[0] +
+	   ((((DCTELEM) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (1 << (PASS1_BITS+2)));
+
+    /* Rows of zeroes can be exploited in the same way as we did with columns.
+     * However, the column calculation has created many nonzero AC terms, so
+     * the simplification applies less often (typically 5% to 10% of the time).
+     * On machines with very fast multiplication, it's possible that the
+     * test takes more time than it's worth.  In that case this section
+     * may be commented out.
+     */
+    
+#ifndef NO_ZERO_ROW_TEST
+    if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
+	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
+      /* AC terms all zero */
+      JSAMPLE dcval = range_limit[(int) IRIGHT_SHIFT(z5, PASS1_BITS+3)
+				  & RANGE_MASK];
+      
+      outptr[0] = dcval;
+      outptr[1] = dcval;
+      outptr[2] = dcval;
+      outptr[3] = dcval;
+      outptr[4] = dcval;
+      outptr[5] = dcval;
+      outptr[6] = dcval;
+      outptr[7] = dcval;
+
+      wsptr += DCTSIZE;		/* advance pointer to next row */
+      continue;
+    }
+#endif
+    
+    /* Even part */
+
+    tmp10 = z5 + (DCTELEM) wsptr[4];
+    tmp11 = z5 - (DCTELEM) wsptr[4];
+
+    tmp13 = (DCTELEM) wsptr[2] + (DCTELEM) wsptr[6];
+    tmp12 = MULTIPLY((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6],
+		     FIX_1_414213562) - tmp13; /* 2*c4 */
+
+    tmp0 = tmp10 + tmp13;
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3];
+    z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3];
+    z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7];
+    z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7];
+
+    tmp7 = z11 + z13;		/* phase 5 */
+    tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
+
+    z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
+    tmp10 = z5 - MULTIPLY(z12, FIX_1_082392200); /* 2*(c2-c6) */
+    tmp12 = z5 - MULTIPLY(z10, FIX_2_613125930); /* 2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;	/* phase 2 */
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 - tmp5;
+
+    /* Final output stage: scale down by a factor of 8 and range-limit */
+
+    outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp7, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp7, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp1 + tmp6, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) IRIGHT_SHIFT(tmp1 - tmp6, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) IRIGHT_SHIFT(tmp2 + tmp5, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) IRIGHT_SHIFT(tmp2 - tmp5, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) IRIGHT_SHIFT(tmp3 + tmp4, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) IRIGHT_SHIFT(tmp3 - tmp4, PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+#endif /* DCT_IFAST_SUPPORTED */
diff --git a/libraries/jpeg/jidctint.c b/libraries/jpeg/jidctint.c
new file mode 100644
index 000000000..6437079a3
--- /dev/null
+++ b/libraries/jpeg/jidctint.c
@@ -0,0 +1,5240 @@
+/*
+ * jidctint.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * Modification developed 2002-2016 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a slow-but-accurate integer implementation of the
+ * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
+ * must also perform dequantization of the input coefficients.
+ *
+ * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
+ * on each row (or vice versa, but it's more convenient to emit a row at
+ * a time).  Direct algorithms are also available, but they are much more
+ * complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on an algorithm described in
+ *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
+ *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
+ *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
+ * The primary algorithm described there uses 11 multiplies and 29 adds.
+ * We use their alternate method with 12 multiplies and 32 adds.
+ * The advantage of this method is that no data path contains more than one
+ * multiplication; this allows a very simple and accurate implementation in
+ * scaled fixed-point arithmetic, with a minimal number of shifts.
+ *
+ * We also provide IDCT routines with various output sample block sizes for
+ * direct resolution reduction or enlargement and for direct resolving the
+ * common 2x1 and 1x2 subsampling cases without additional resampling: NxN
+ * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 input DCT block.
+ *
+ * For N<8 we simply take the corresponding low-frequency coefficients of
+ * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
+ * to yield the downscaled outputs.
+ * This can be seen as direct low-pass downsampling from the DCT domain
+ * point of view rather than the usual spatial domain point of view,
+ * yielding significant computational savings and results at least
+ * as good as common bilinear (averaging) spatial downsampling.
+ *
+ * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
+ * lower frequencies and higher frequencies assumed to be zero.
+ * It turns out that the computational effort is similar to the 8x8 IDCT
+ * regarding the output size.
+ * Furthermore, the scaling and descaling is the same for all IDCT sizes.
+ *
+ * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
+ * since there would be too many additional constants to pre-calculate.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_ISLOW_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
+#endif
+
+
+/*
+ * The poop on this scaling stuff is as follows:
+ *
+ * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
+ * larger than the true IDCT outputs.  The final outputs are therefore
+ * a factor of N larger than desired; since N=8 this can be cured by
+ * a simple right shift at the end of the algorithm.  The advantage of
+ * this arrangement is that we save two multiplications per 1-D IDCT,
+ * because the y0 and y4 inputs need not be divided by sqrt(N).
+ *
+ * We have to do addition and subtraction of the integer inputs, which
+ * is no problem, and multiplication by fractional constants, which is
+ * a problem to do in integer arithmetic.  We multiply all the constants
+ * by CONST_SCALE and convert them to integer constants (thus retaining
+ * CONST_BITS bits of precision in the constants).  After doing a
+ * multiplication we have to divide the product by CONST_SCALE, with proper
+ * rounding, to produce the correct output.  This division can be done
+ * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
+ * as long as possible so that partial sums can be added together with
+ * full fractional precision.
+ *
+ * The outputs of the first pass are scaled up by PASS1_BITS bits so that
+ * they are represented to better-than-integral precision.  These outputs
+ * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
+ * with the recommended scaling.  (To scale up 12-bit sample data further, an
+ * intermediate INT32 array would be needed.)
+ *
+ * To avoid overflow of the 32-bit intermediate results in pass 2, we must
+ * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
+ * shows that the values given below are the most effective.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS  13
+#define PASS1_BITS  2
+#else
+#define CONST_BITS  13
+#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 13
+#define FIX_0_298631336  ((INT32)  2446)	/* FIX(0.298631336) */
+#define FIX_0_390180644  ((INT32)  3196)	/* FIX(0.390180644) */
+#define FIX_0_541196100  ((INT32)  4433)	/* FIX(0.541196100) */
+#define FIX_0_765366865  ((INT32)  6270)	/* FIX(0.765366865) */
+#define FIX_0_899976223  ((INT32)  7373)	/* FIX(0.899976223) */
+#define FIX_1_175875602  ((INT32)  9633)	/* FIX(1.175875602) */
+#define FIX_1_501321110  ((INT32)  12299)	/* FIX(1.501321110) */
+#define FIX_1_847759065  ((INT32)  15137)	/* FIX(1.847759065) */
+#define FIX_1_961570560  ((INT32)  16069)	/* FIX(1.961570560) */
+#define FIX_2_053119869  ((INT32)  16819)	/* FIX(2.053119869) */
+#define FIX_2_562915447  ((INT32)  20995)	/* FIX(2.562915447) */
+#define FIX_3_072711026  ((INT32)  25172)	/* FIX(3.072711026) */
+#else
+#define FIX_0_298631336  FIX(0.298631336)
+#define FIX_0_390180644  FIX(0.390180644)
+#define FIX_0_541196100  FIX(0.541196100)
+#define FIX_0_765366865  FIX(0.765366865)
+#define FIX_0_899976223  FIX(0.899976223)
+#define FIX_1_175875602  FIX(1.175875602)
+#define FIX_1_501321110  FIX(1.501321110)
+#define FIX_1_847759065  FIX(1.847759065)
+#define FIX_1_961570560  FIX(1.961570560)
+#define FIX_2_053119869  FIX(2.053119869)
+#define FIX_2_562915447  FIX(2.562915447)
+#define FIX_3_072711026  FIX(3.072711026)
+#endif
+
+
+/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
+ * For 8-bit samples with the recommended scaling, all the variable
+ * and constant values involved are no more than 16 bits wide, so a
+ * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
+ * For 12-bit samples, a full 32-bit multiplication will be needed.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
+#else
+#define MULTIPLY(var,const)  ((var) * (const))
+#endif
+
+
+/* Dequantize a coefficient by multiplying it by the multiplier-table
+ * entry; produce an int result.  In this module, both inputs and result
+ * are 16 bits or less, so either int or short multiply will work.
+ */
+
+#define DEQUANTIZE(coef,quantval)  (((ISLOW_MULT_TYPE) (coef)) * (quantval))
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients.
+ *
+ * Optimized algorithm with 12 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/16).
+ */
+
+GLOBAL(void)
+jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3;
+  INT32 tmp10, tmp11, tmp12, tmp13;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[DCTSIZE2];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * Note results are scaled up by sqrt(8) compared to a true IDCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+	inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero */
+      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
+
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      wsptr[DCTSIZE*2] = dcval;
+      wsptr[DCTSIZE*3] = dcval;
+      wsptr[DCTSIZE*4] = dcval;
+      wsptr[DCTSIZE*5] = dcval;
+      wsptr[DCTSIZE*6] = dcval;
+      wsptr[DCTSIZE*7] = dcval;
+
+      inptr++;			/* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+
+    /* Even part: reverse the even part of the forward DCT.
+     * The rotator is c(-6).
+     */
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z2 <<= CONST_BITS;
+    z3 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z2 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    tmp0 = z2 + z3;
+    tmp1 = z2 - z3;
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
+
+    tmp10 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+    tmp11 = tmp1 + tmp3;
+    tmp12 = tmp1 - tmp3;
+
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+
+    z2 = tmp0 + tmp2;
+    z3 = tmp1 + tmp3;
+
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
+    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
+    z2 += z1;
+    z3 += z1;
+
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
+    tmp0 += z1 + z2;
+    tmp3 += z1 + z3;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
+    tmp1 += z1 + z3;
+    tmp2 += z1 + z2;
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
+
+    inptr++;			/* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+
+  /* Pass 2: process rows from work array, store into output array.
+   * Note that we must descale the results by a factor of 8 == 2**3,
+   * and also undo the PASS1_BITS scaling.
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z2 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
+
+    /* Rows of zeroes can be exploited in the same way as we did with columns.
+     * However, the column calculation has created many nonzero AC terms, so
+     * the simplification applies less often (typically 5% to 10% of the time).
+     * On machines with very fast multiplication, it's possible that the
+     * test takes more time than it's worth.  In that case this section
+     * may be commented out.
+     */
+
+#ifndef NO_ZERO_ROW_TEST
+    if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
+	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
+      /* AC terms all zero */
+      JSAMPLE dcval = range_limit[(int) RIGHT_SHIFT(z2, PASS1_BITS+3)
+				  & RANGE_MASK];
+
+      outptr[0] = dcval;
+      outptr[1] = dcval;
+      outptr[2] = dcval;
+      outptr[3] = dcval;
+      outptr[4] = dcval;
+      outptr[5] = dcval;
+      outptr[6] = dcval;
+      outptr[7] = dcval;
+
+      wsptr += DCTSIZE;		/* advance pointer to next row */
+      continue;
+    }
+#endif
+
+    /* Even part: reverse the even part of the forward DCT.
+     * The rotator is c(-6).
+     */
+
+    z3 = (INT32) wsptr[4];
+
+    tmp0 = (z2 + z3) << CONST_BITS;
+    tmp1 = (z2 - z3) << CONST_BITS;
+
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[6];
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
+
+    tmp10 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+    tmp11 = tmp1 + tmp3;
+    tmp12 = tmp1 - tmp3;
+
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+
+    tmp0 = (INT32) wsptr[7];
+    tmp1 = (INT32) wsptr[5];
+    tmp2 = (INT32) wsptr[3];
+    tmp3 = (INT32) wsptr[1];
+
+    z2 = tmp0 + tmp2;
+    z3 = tmp1 + tmp3;
+
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
+    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
+    z2 += z1;
+    z3 += z1;
+
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
+    tmp0 += z1 + z2;
+    tmp3 += z1 + z3;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
+    tmp1 += z1 + z3;
+    tmp2 += z1 + z2;
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+#ifdef IDCT_SCALING_SUPPORTED
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 7x7 output block.
+ *
+ * Optimized algorithm with 12 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/14).
+ */
+
+GLOBAL(void)
+jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[7*7];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp13 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp13 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
+    tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
+    tmp0 = z1 + z3;
+    z2 -= tmp0;
+    tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
+    tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
+    tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
+    tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+
+    tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
+    tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
+    tmp0 = tmp1 - tmp2;
+    tmp1 += tmp2;
+    tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276));    /* -c1 */
+    tmp1 += tmp2;
+    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
+    tmp0 += z2;
+    tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
+
+    /* Final output stage */
+
+    wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 7 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 7; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp13 = (INT32) wsptr[0] +
+	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	       (ONE << (PASS1_BITS+2)));
+    tmp13 <<= CONST_BITS;
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[4];
+    z3 = (INT32) wsptr[6];
+
+    tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
+    tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
+    tmp0 = z1 + z3;
+    z2 -= tmp0;
+    tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
+    tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
+    tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
+    tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+
+    tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
+    tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
+    tmp0 = tmp1 - tmp2;
+    tmp1 += tmp2;
+    tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276));    /* -c1 */
+    tmp1 += tmp2;
+    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
+    tmp0 += z2;
+    tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 7;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 6x6 output block.
+ *
+ * Optimized algorithm with 3 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/12).
+ */
+
+GLOBAL(void)
+jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[6*6];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
+    tmp1 = tmp0 + tmp10;
+    tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
+    tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
+    tmp10 = tmp1 + tmp0;
+    tmp12 = tmp1 - tmp0;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
+    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
+    tmp1 = (z1 - z2 - z3) << PASS1_BITS;
+
+    /* Final output stage */
+
+    wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[6*1] = (int) (tmp11 + tmp1);
+    wsptr[6*4] = (int) (tmp11 - tmp1);
+    wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 6 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 6; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = (INT32) wsptr[0] +
+	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	      (ONE << (PASS1_BITS+2)));
+    tmp0 <<= CONST_BITS;
+    tmp2 = (INT32) wsptr[4];
+    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
+    tmp1 = tmp0 + tmp10;
+    tmp11 = tmp0 - tmp10 - tmp10;
+    tmp10 = (INT32) wsptr[2];
+    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
+    tmp10 = tmp1 + tmp0;
+    tmp12 = tmp1 - tmp0;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
+    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
+    tmp1 = (z1 - z2 - z3) << CONST_BITS;
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 6;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 5x5 output block.
+ *
+ * Optimized algorithm with 5 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/10).
+ */
+
+GLOBAL(void)
+jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp10, tmp11, tmp12;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[5*5];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp12 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
+    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
+    z3 = tmp12 + z2;
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z1;
+    tmp12 -= z2 << 2;
+
+    /* Odd part */
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
+    tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
+    tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
+
+    /* Final output stage */
+
+    wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 5 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 5; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp12 = (INT32) wsptr[0] +
+	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	       (ONE << (PASS1_BITS+2)));
+    tmp12 <<= CONST_BITS;
+    tmp0 = (INT32) wsptr[2];
+    tmp1 = (INT32) wsptr[4];
+    z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
+    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
+    z3 = tmp12 + z2;
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z1;
+    tmp12 -= z2 << 2;
+
+    /* Odd part */
+
+    z2 = (INT32) wsptr[1];
+    z3 = (INT32) wsptr[3];
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
+    tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
+    tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 5;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 4x4 output block.
+ *
+ * Optimized algorithm with 3 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
+ */
+
+GLOBAL(void)
+jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp2, tmp10, tmp12;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[4*4];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    
+    tmp10 = (tmp0 + tmp2) << PASS1_BITS;
+    tmp12 = (tmp0 - tmp2) << PASS1_BITS;
+
+    /* Odd part */
+    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);               /* c6 */
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
+		       CONST_BITS-PASS1_BITS);
+    tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
+		       CONST_BITS-PASS1_BITS);
+
+    /* Final output stage */
+
+    wsptr[4*0] = (int) (tmp10 + tmp0);
+    wsptr[4*3] = (int) (tmp10 - tmp0);
+    wsptr[4*1] = (int) (tmp12 + tmp2);
+    wsptr[4*2] = (int) (tmp12 - tmp2);
+  }
+
+  /* Pass 2: process 4 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 4; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = (INT32) wsptr[0] +
+	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	      (ONE << (PASS1_BITS+2)));
+    tmp2 = (INT32) wsptr[2];
+
+    tmp10 = (tmp0 + tmp2) << CONST_BITS;
+    tmp12 = (tmp0 - tmp2) << CONST_BITS;
+
+    /* Odd part */
+    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
+
+    z2 = (INT32) wsptr[1];
+    z3 = (INT32) wsptr[3];
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
+    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
+    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 4;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 3x3 output block.
+ *
+ * Optimized algorithm with 2 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/6).
+ */
+
+GLOBAL(void)
+jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp2, tmp10, tmp12;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[3*3];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
+    tmp10 = tmp0 + tmp12;
+    tmp2 = tmp0 - tmp12 - tmp12;
+
+    /* Odd part */
+
+    tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
+
+    /* Final output stage */
+
+    wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 3 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 3; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = (INT32) wsptr[0] +
+	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	      (ONE << (PASS1_BITS+2)));
+    tmp0 <<= CONST_BITS;
+    tmp2 = (INT32) wsptr[2];
+    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
+    tmp10 = tmp0 + tmp12;
+    tmp2 = tmp0 - tmp12 - tmp12;
+
+    /* Odd part */
+
+    tmp12 = (INT32) wsptr[1];
+    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 3;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 2x2 output block.
+ *
+ * Multiplication-less algorithm.
+ */
+
+GLOBAL(void)
+jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+  ISLOW_MULT_TYPE * quantptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  ISHIFT_TEMPS
+
+  /* Pass 1: process columns from input. */
+
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+
+  /* Column 0 */
+  tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
+  tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
+  /* Add range center and fudge factor for final descale and range-limit. */
+  tmp4 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
+
+  tmp0 = tmp4 + tmp5;
+  tmp2 = tmp4 - tmp5;
+
+  /* Column 1 */
+  tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
+  tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
+
+  tmp1 = tmp4 + tmp5;
+  tmp3 = tmp4 - tmp5;
+
+  /* Pass 2: process 2 rows, store into output array. */
+
+  /* Row 0 */
+  outptr = output_buf[0] + output_col;
+
+  outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
+  outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
+
+  /* Row 1 */
+  outptr = output_buf[1] + output_col;
+
+  outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK];
+  outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK];
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 1x1 output block.
+ *
+ * We hardly need an inverse DCT routine for this: just take the
+ * average pixel value, which is one-eighth of the DC coefficient.
+ */
+
+GLOBAL(void)
+jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  DCTELEM dcval;
+  ISLOW_MULT_TYPE * quantptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  ISHIFT_TEMPS
+
+  /* 1x1 is trivial: just take the DC coefficient divided by 8. */
+
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+
+  dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
+  /* Add range center and fudge factor for descale and range-limit. */
+  dcval += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
+
+  output_buf[0][output_col] =
+    range_limit[(int) IRIGHT_SHIFT(dcval, 3) & RANGE_MASK];
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 9x9 output block.
+ *
+ * Optimized algorithm with 10 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/18).
+ */
+
+GLOBAL(void)
+jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*9];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
+    tmp1 = tmp0 + tmp3;
+    tmp2 = tmp0 - tmp3 - tmp3;
+
+    tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
+    tmp11 = tmp2 + tmp0;
+    tmp14 = tmp2 - tmp0 - tmp0;
+
+    tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
+    tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
+    tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
+
+    tmp10 = tmp1 + tmp0 - tmp3;
+    tmp12 = tmp1 - tmp0 + tmp2;
+    tmp13 = tmp1 - tmp2 + tmp3;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    z2 = MULTIPLY(z2, - FIX(1.224744871));           /* -c3 */
+
+    tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
+    tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
+    tmp0 = tmp2 + tmp3 - z2;
+    tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
+    tmp2 += z2 - tmp1;
+    tmp3 += z2 + tmp1;
+    tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
+
+    /* Final output stage */
+
+    wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 9 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 9; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = (INT32) wsptr[0] +
+	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	      (ONE << (PASS1_BITS+2)));
+    tmp0 <<= CONST_BITS;
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[4];
+    z3 = (INT32) wsptr[6];
+
+    tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
+    tmp1 = tmp0 + tmp3;
+    tmp2 = tmp0 - tmp3 - tmp3;
+
+    tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
+    tmp11 = tmp2 + tmp0;
+    tmp14 = tmp2 - tmp0 - tmp0;
+
+    tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
+    tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
+    tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
+
+    tmp10 = tmp1 + tmp0 - tmp3;
+    tmp12 = tmp1 - tmp0 + tmp2;
+    tmp13 = tmp1 - tmp2 + tmp3;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    z2 = MULTIPLY(z2, - FIX(1.224744871));           /* -c3 */
+
+    tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
+    tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
+    tmp0 = tmp2 + tmp3 - z2;
+    tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
+    tmp2 += z2 - tmp1;
+    tmp3 += z2 + tmp1;
+    tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 10x10 output block.
+ *
+ * Optimized algorithm with 12 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/20).
+ */
+
+GLOBAL(void)
+jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
+  INT32 z1, z2, z3, z4, z5;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*10];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
+    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z2;
+
+    tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1),   /* c0 = (c4-c8)*2 */
+			CONST_BITS-PASS1_BITS);
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
+    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
+    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
+
+    tmp20 = tmp10 + tmp12;
+    tmp24 = tmp10 - tmp12;
+    tmp21 = tmp11 + tmp13;
+    tmp23 = tmp11 - tmp13;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = z2 + z4;
+    tmp13 = z2 - z4;
+
+    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
+    z5 = z3 << CONST_BITS;
+
+    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
+    z4 = z5 + tmp12;
+
+    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
+    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
+
+    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
+    z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
+
+    tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
+
+    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
+    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
+
+    /* Final output stage */
+
+    wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2] = (int) (tmp22 + tmp12);
+    wsptr[8*7] = (int) (tmp22 - tmp12);
+    wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 10 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 10; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z3 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
+    z3 <<= CONST_BITS;
+    z4 = (INT32) wsptr[4];
+    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
+    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z2;
+
+    tmp22 = z3 - ((z1 - z2) << 1);               /* c0 = (c4-c8)*2 */
+
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[6];
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
+    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
+    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
+
+    tmp20 = tmp10 + tmp12;
+    tmp24 = tmp10 - tmp12;
+    tmp21 = tmp11 + tmp13;
+    tmp23 = tmp11 - tmp13;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z3 <<= CONST_BITS;
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = z2 + z4;
+    tmp13 = z2 - z4;
+
+    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
+
+    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
+    z4 = z3 + tmp12;
+
+    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
+    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
+
+    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
+    z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
+
+    tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
+
+    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
+    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 11x11 output block.
+ *
+ * Optimized algorithm with 24 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/22).
+ */
+
+GLOBAL(void)
+jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*11];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp10 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
+    tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
+    z4 = z1 + z3;
+    tmp24 = MULTIPLY(z4, - FIX(1.155664402));        /* -(c2-c10) */
+    z4 -= z2;
+    tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
+    tmp21 = tmp20 + tmp23 + tmp25 -
+	    MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
+    tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
+    tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
+    tmp24 += tmp25;
+    tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
+    tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
+	     MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
+    tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = z1 + z2;
+    tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
+    tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
+    tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
+    tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+	    MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
+    z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
+    tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
+    tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
+    z1    = MULTIPLY(z2 + z4, - FIX(1.798248910));       /* -(c1+c9) */
+    tmp11 += z1;
+    tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
+    tmp14 += MULTIPLY(z2, - FIX(1.467221301)) +          /* -(c5+c9) */
+	     MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
+	     MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 11 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 11; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp10 = (INT32) wsptr[0] +
+	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	       (ONE << (PASS1_BITS+2)));
+    tmp10 <<= CONST_BITS;
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[4];
+    z3 = (INT32) wsptr[6];
+
+    tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
+    tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
+    z4 = z1 + z3;
+    tmp24 = MULTIPLY(z4, - FIX(1.155664402));        /* -(c2-c10) */
+    z4 -= z2;
+    tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
+    tmp21 = tmp20 + tmp23 + tmp25 -
+	    MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
+    tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
+    tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
+    tmp24 += tmp25;
+    tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
+    tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
+	     MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
+    tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = z1 + z2;
+    tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
+    tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
+    tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
+    tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+	    MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
+    z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
+    tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
+    tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
+    z1    = MULTIPLY(z2 + z4, - FIX(1.798248910));       /* -(c1+c9) */
+    tmp11 += z1;
+    tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
+    tmp14 += MULTIPLY(z2, - FIX(1.467221301)) +          /* -(c5+c9) */
+	     MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
+	     MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 12x12 output block.
+ *
+ * Optimized algorithm with 15 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/24).
+ */
+
+GLOBAL(void)
+jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*12];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
+    z1 <<= CONST_BITS;
+    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+    z2 <<= CONST_BITS;
+
+    tmp12 = z1 - z2;
+
+    tmp21 = z3 + tmp12;
+    tmp24 = z3 - tmp12;
+
+    tmp12 = z4 + z2;
+
+    tmp20 = tmp10 + tmp12;
+    tmp25 = tmp10 - tmp12;
+
+    tmp12 = z4 - z1 - z2;
+
+    tmp22 = tmp11 + tmp12;
+    tmp23 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
+    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
+
+    tmp10 = z1 + z3;
+    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
+    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
+    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
+    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
+    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
+    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
+    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
+	     MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
+
+    z1 -= z4;
+    z2 -= z3;
+    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
+    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
+    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 12 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 12; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z3 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
+    z3 <<= CONST_BITS;
+
+    z4 = (INT32) wsptr[4];
+    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    z1 = (INT32) wsptr[2];
+    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
+    z1 <<= CONST_BITS;
+    z2 = (INT32) wsptr[6];
+    z2 <<= CONST_BITS;
+
+    tmp12 = z1 - z2;
+
+    tmp21 = z3 + tmp12;
+    tmp24 = z3 - tmp12;
+
+    tmp12 = z4 + z2;
+
+    tmp20 = tmp10 + tmp12;
+    tmp25 = tmp10 - tmp12;
+
+    tmp12 = z4 - z1 - z2;
+
+    tmp22 = tmp11 + tmp12;
+    tmp23 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
+    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
+
+    tmp10 = z1 + z3;
+    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
+    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
+    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
+    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
+    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
+    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
+    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
+	     MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
+
+    z1 -= z4;
+    z2 -= z3;
+    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
+    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
+    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 13x13 output block.
+ *
+ * Optimized algorithm with 29 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/26).
+ */
+
+GLOBAL(void)
+jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*13];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z1 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
+
+    tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
+    tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
+
+    tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
+
+    tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
+    tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
+
+    tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
+
+    tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
+    tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
+
+    tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
+    tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
+    tmp15 = z1 + z4;
+    tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+	    MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
+    tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458));   /* -c11 */
+    tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
+    tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
+    tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945));   /* -c5 */
+    tmp11 += tmp14;
+    tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
+    tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813));   /* -c9 */
+    tmp12 += tmp14;
+    tmp13 += tmp14;
+    tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
+    tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
+	    MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
+    z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
+    tmp14 += z1;
+    tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
+	     MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 13 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 13; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z1 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
+    z1 <<= CONST_BITS;
+
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[4];
+    z4 = (INT32) wsptr[6];
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
+
+    tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
+    tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
+
+    tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
+
+    tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
+    tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
+
+    tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
+
+    tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
+    tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
+
+    tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
+    tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
+    tmp15 = z1 + z4;
+    tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+	    MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
+    tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458));   /* -c11 */
+    tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
+    tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
+    tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945));   /* -c5 */
+    tmp11 += tmp14;
+    tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
+    tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813));   /* -c9 */
+    tmp12 += tmp14;
+    tmp13 += tmp14;
+    tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
+    tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
+	    MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
+    z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
+    tmp14 += z1;
+    tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
+	     MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 14x14 output block.
+ *
+ * Optimized algorithm with 20 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/28).
+ */
+
+GLOBAL(void)
+jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*14];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z1 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
+    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
+    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
+
+    tmp10 = z1 + z2;
+    tmp11 = z1 + z3;
+    tmp12 = z1 - z4;
+
+    tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
+			CONST_BITS-PASS1_BITS);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
+
+    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
+    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
+    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
+	    MULTIPLY(z2, FIX(1.378756276));      /* c2 */
+
+    tmp20 = tmp10 + tmp13;
+    tmp26 = tmp10 - tmp13;
+    tmp21 = tmp11 + tmp14;
+    tmp25 = tmp11 - tmp14;
+    tmp22 = tmp12 + tmp15;
+    tmp24 = tmp12 - tmp15;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+    tmp13 = z4 << CONST_BITS;
+
+    tmp14 = z1 + z3;
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
+    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
+    tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
+    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
+    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
+    z1    -= z2;
+    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13;        /* c11 */
+    tmp16 += tmp15;
+    z1    += z4;
+    z4    = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
+    tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948));          /* c3-c9-c13 */
+    tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773));          /* c3+c5-c13 */
+    z4    = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
+    tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
+    tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567));          /* c1+c11-c5 */
+
+    tmp13 = (z1 - z3) << PASS1_BITS;
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) (tmp23 + tmp13);
+    wsptr[8*10] = (int) (tmp23 - tmp13);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 14 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 14; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z1 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
+    z1 <<= CONST_BITS;
+    z4 = (INT32) wsptr[4];
+    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
+    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
+    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
+
+    tmp10 = z1 + z2;
+    tmp11 = z1 + z3;
+    tmp12 = z1 - z4;
+
+    tmp23 = z1 - ((z2 + z3 - z4) << 1);          /* c0 = (c4+c12-c8)*2 */
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[6];
+
+    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
+
+    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
+    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
+    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
+	    MULTIPLY(z2, FIX(1.378756276));      /* c2 */
+
+    tmp20 = tmp10 + tmp13;
+    tmp26 = tmp10 - tmp13;
+    tmp21 = tmp11 + tmp14;
+    tmp25 = tmp11 - tmp14;
+    tmp22 = tmp12 + tmp15;
+    tmp24 = tmp12 - tmp15;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+    z4 <<= CONST_BITS;
+
+    tmp14 = z1 + z3;
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
+    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
+    tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
+    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
+    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
+    z1    -= z2;
+    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4;           /* c11 */
+    tmp16 += tmp15;
+    tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4;    /* -c13 */
+    tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948));       /* c3-c9-c13 */
+    tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773));       /* c3+c5-c13 */
+    tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
+    tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
+    tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567));       /* c1+c11-c5 */
+
+    tmp13 = ((z1 - z3) << CONST_BITS) + z4;
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 15x15 output block.
+ *
+ * Optimized algorithm with 22 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/30).
+ */
+
+GLOBAL(void)
+jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*15];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z1 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
+    tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
+
+    tmp12 = z1 - tmp10;
+    tmp13 = z1 + tmp11;
+    z1 -= (tmp11 - tmp10) << 1;             /* c0 = (c6-c12)*2 */
+
+    z4 = z2 - z3;
+    z3 += z2;
+    tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
+    z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
+
+    tmp20 = tmp13 + tmp10 + tmp11;
+    tmp23 = tmp12 - tmp10 + tmp11 + z2;
+
+    tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
+
+    tmp25 = tmp13 - tmp10 - tmp11;
+    tmp26 = tmp12 + tmp10 - tmp11 - z2;
+
+    tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
+
+    tmp21 = tmp12 + tmp10 + tmp11;
+    tmp24 = tmp13 - tmp10 + tmp11;
+    tmp11 += tmp11;
+    tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
+    tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp13 = z2 - z4;
+    tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
+    tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
+    tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
+
+    tmp13 = MULTIPLY(z2, - FIX(0.831253876));               /* -c9 */
+    tmp15 = MULTIPLY(z2, - FIX(1.344997024));               /* -c3 */
+    z2 = z1 - z4;
+    tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
+
+    tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
+    tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
+    tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
+    z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
+    tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
+    tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 15 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 15; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z1 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
+    z1 <<= CONST_BITS;
+
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[4];
+    z4 = (INT32) wsptr[6];
+
+    tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
+    tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
+
+    tmp12 = z1 - tmp10;
+    tmp13 = z1 + tmp11;
+    z1 -= (tmp11 - tmp10) << 1;             /* c0 = (c6-c12)*2 */
+
+    z4 = z2 - z3;
+    z3 += z2;
+    tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
+    z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
+
+    tmp20 = tmp13 + tmp10 + tmp11;
+    tmp23 = tmp12 - tmp10 + tmp11 + z2;
+
+    tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
+
+    tmp25 = tmp13 - tmp10 - tmp11;
+    tmp26 = tmp12 + tmp10 - tmp11 - z2;
+
+    tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
+
+    tmp21 = tmp12 + tmp10 + tmp11;
+    tmp24 = tmp13 - tmp10 + tmp11;
+    tmp11 += tmp11;
+    tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
+    tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z4 = (INT32) wsptr[5];
+    z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
+    z4 = (INT32) wsptr[7];
+
+    tmp13 = z2 - z4;
+    tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
+    tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
+    tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
+
+    tmp13 = MULTIPLY(z2, - FIX(0.831253876));               /* -c9 */
+    tmp15 = MULTIPLY(z2, - FIX(1.344997024));               /* -c3 */
+    z2 = z1 - z4;
+    tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
+
+    tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
+    tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
+    tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
+    z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
+    tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
+    tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 16x16 output block.
+ *
+ * Optimized algorithm with 28 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/32).
+ */
+
+GLOBAL(void)
+jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*16];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
+    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
+
+    tmp10 = tmp0 + tmp1;
+    tmp11 = tmp0 - tmp1;
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+    z3 = z1 - z2;
+    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
+    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
+
+    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
+    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
+    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
+    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
+
+    tmp20 = tmp10 + tmp0;
+    tmp27 = tmp10 - tmp0;
+    tmp21 = tmp12 + tmp1;
+    tmp26 = tmp12 - tmp1;
+    tmp22 = tmp13 + tmp2;
+    tmp25 = tmp13 - tmp2;
+    tmp23 = tmp11 + tmp3;
+    tmp24 = tmp11 - tmp3;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = z1 + z3;
+
+    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
+    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
+    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
+    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
+    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
+    tmp0  = tmp1 + tmp2 + tmp3 -
+	    MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
+    tmp13 = tmp10 + tmp11 + tmp12 -
+	    MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
+    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
+    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
+    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
+    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
+    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
+    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
+    z2    += z4;
+    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
+    tmp1  += z1;
+    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
+    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
+    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
+    tmp12 += z2;
+    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
+    tmp2  += z2;
+    tmp3  += z2;
+    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
+    tmp10 += z2;
+    tmp11 += z2;
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp0,  CONST_BITS-PASS1_BITS);
+    wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0,  CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp1,  CONST_BITS-PASS1_BITS);
+    wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1,  CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp2,  CONST_BITS-PASS1_BITS);
+    wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2,  CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp3,  CONST_BITS-PASS1_BITS);
+    wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3,  CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 16 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 16; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = (INT32) wsptr[0] +
+	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	      (ONE << (PASS1_BITS+2)));
+    tmp0 <<= CONST_BITS;
+
+    z1 = (INT32) wsptr[4];
+    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
+    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
+
+    tmp10 = tmp0 + tmp1;
+    tmp11 = tmp0 - tmp1;
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[6];
+    z3 = z1 - z2;
+    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
+    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
+
+    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
+    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
+    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
+    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
+
+    tmp20 = tmp10 + tmp0;
+    tmp27 = tmp10 - tmp0;
+    tmp21 = tmp12 + tmp1;
+    tmp26 = tmp12 - tmp1;
+    tmp22 = tmp13 + tmp2;
+    tmp25 = tmp13 - tmp2;
+    tmp23 = tmp11 + tmp3;
+    tmp24 = tmp11 - tmp3;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = z1 + z3;
+
+    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
+    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
+    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
+    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
+    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
+    tmp0  = tmp1 + tmp2 + tmp3 -
+	    MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
+    tmp13 = tmp10 + tmp11 + tmp12 -
+	    MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
+    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
+    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
+    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
+    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
+    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
+    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
+    z2    += z4;
+    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
+    tmp1  += z1;
+    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
+    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
+    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
+    tmp12 += z2;
+    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
+    tmp2  += z2;
+    tmp3  += z2;
+    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
+    tmp10 += z2;
+    tmp11 += z2;
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 16x8 output block.
+ *
+ * 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		JCOEFPTR coef_block,
+		JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*8];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * Note results are scaled up by sqrt(8) compared to a true IDCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+	inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero */
+      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
+
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      wsptr[DCTSIZE*2] = dcval;
+      wsptr[DCTSIZE*3] = dcval;
+      wsptr[DCTSIZE*4] = dcval;
+      wsptr[DCTSIZE*5] = dcval;
+      wsptr[DCTSIZE*6] = dcval;
+      wsptr[DCTSIZE*7] = dcval;
+
+      inptr++;			/* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+
+    /* Even part: reverse the even part of the forward DCT.
+     * The rotator is c(-6).
+     */
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z2 <<= CONST_BITS;
+    z3 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z2 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    tmp0 = z2 + z3;
+    tmp1 = z2 - z3;
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
+
+    tmp10 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+    tmp11 = tmp1 + tmp3;
+    tmp12 = tmp1 - tmp3;
+
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+
+    z2 = tmp0 + tmp2;
+    z3 = tmp1 + tmp3;
+
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
+    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
+    z2 += z1;
+    z3 += z1;
+
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
+    tmp0 += z1 + z2;
+    tmp3 += z1 + z3;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
+    tmp1 += z1 + z3;
+    tmp2 += z1 + z2;
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
+
+    inptr++;			/* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+
+  /* Pass 2: process 8 rows from work array, store into output array.
+   * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = (INT32) wsptr[0] +
+	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	      (ONE << (PASS1_BITS+2)));
+    tmp0 <<= CONST_BITS;
+
+    z1 = (INT32) wsptr[4];
+    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
+    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
+
+    tmp10 = tmp0 + tmp1;
+    tmp11 = tmp0 - tmp1;
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[6];
+    z3 = z1 - z2;
+    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
+    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
+
+    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
+    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
+    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
+    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
+
+    tmp20 = tmp10 + tmp0;
+    tmp27 = tmp10 - tmp0;
+    tmp21 = tmp12 + tmp1;
+    tmp26 = tmp12 - tmp1;
+    tmp22 = tmp13 + tmp2;
+    tmp25 = tmp13 - tmp2;
+    tmp23 = tmp11 + tmp3;
+    tmp24 = tmp11 - tmp3;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = z1 + z3;
+
+    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
+    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
+    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
+    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
+    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
+    tmp0  = tmp1 + tmp2 + tmp3 -
+	    MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
+    tmp13 = tmp10 + tmp11 + tmp12 -
+	    MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
+    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
+    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
+    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
+    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
+    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
+    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
+    z2    += z4;
+    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
+    tmp1  += z1;
+    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
+    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
+    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
+    tmp12 += z2;
+    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
+    tmp2  += z2;
+    tmp3  += z2;
+    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
+    tmp10 += z2;
+    tmp11 += z2;
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 14x7 output block.
+ *
+ * 7-point IDCT in pass 1 (columns), 14-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		JCOEFPTR coef_block,
+		JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*7];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp23 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp23 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp23 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734));       /* c4 */
+    tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123));       /* c6 */
+    tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
+    tmp10 = z1 + z3;
+    z2 -= tmp10;
+    tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
+    tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536));   /* c2-c4-c6 */
+    tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249));   /* c2+c4+c6 */
+    tmp23 += MULTIPLY(z2, FIX(1.414213562));           /* c0 */
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+
+    tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347));       /* (c3+c1-c5)/2 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339));       /* (c3+c5-c1)/2 */
+    tmp10 = tmp11 - tmp12;
+    tmp11 += tmp12;
+    tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276));     /* -c1 */
+    tmp11 += tmp12;
+    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));          /* c5 */
+    tmp10 += z2;
+    tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693));      /* c3+c1-c5 */
+
+    /* Final output stage */
+
+    wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*6] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*5] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*4] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3] = (int) RIGHT_SHIFT(tmp23, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 7 rows from work array, store into output array.
+   * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 7; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z1 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
+    z1 <<= CONST_BITS;
+    z4 = (INT32) wsptr[4];
+    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
+    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
+    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
+
+    tmp10 = z1 + z2;
+    tmp11 = z1 + z3;
+    tmp12 = z1 - z4;
+
+    tmp23 = z1 - ((z2 + z3 - z4) << 1);          /* c0 = (c4+c12-c8)*2 */
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[6];
+
+    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
+
+    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
+    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
+    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
+	    MULTIPLY(z2, FIX(1.378756276));      /* c2 */
+
+    tmp20 = tmp10 + tmp13;
+    tmp26 = tmp10 - tmp13;
+    tmp21 = tmp11 + tmp14;
+    tmp25 = tmp11 - tmp14;
+    tmp22 = tmp12 + tmp15;
+    tmp24 = tmp12 - tmp15;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+    z4 <<= CONST_BITS;
+
+    tmp14 = z1 + z3;
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
+    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
+    tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
+    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
+    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
+    z1    -= z2;
+    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4;           /* c11 */
+    tmp16 += tmp15;
+    tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4;    /* -c13 */
+    tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948));       /* c3-c9-c13 */
+    tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773));       /* c3+c5-c13 */
+    tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
+    tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
+    tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567));       /* c1+c11-c5 */
+
+    tmp13 = ((z1 - z3) << CONST_BITS) + z4;
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 12x6 output block.
+ *
+ * 6-point IDCT in pass 1 (columns), 12-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		JCOEFPTR coef_block,
+		JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*6];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp10 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp12 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp20 = MULTIPLY(tmp12, FIX(0.707106781));   /* c4 */
+    tmp11 = tmp10 + tmp20;
+    tmp21 = RIGHT_SHIFT(tmp10 - tmp20 - tmp20, CONST_BITS-PASS1_BITS);
+    tmp20 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp10 = MULTIPLY(tmp20, FIX(1.224744871));   /* c2 */
+    tmp20 = tmp11 + tmp10;
+    tmp22 = tmp11 - tmp10;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+    tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
+    tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
+    tmp11 = (z1 - z2 - z3) << PASS1_BITS;
+
+    /* Final output stage */
+
+    wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*5] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1] = (int) (tmp21 + tmp11);
+    wsptr[8*4] = (int) (tmp21 - tmp11);
+    wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 6 rows from work array, store into output array.
+   * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 6; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z3 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
+    z3 <<= CONST_BITS;
+
+    z4 = (INT32) wsptr[4];
+    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    z1 = (INT32) wsptr[2];
+    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
+    z1 <<= CONST_BITS;
+    z2 = (INT32) wsptr[6];
+    z2 <<= CONST_BITS;
+
+    tmp12 = z1 - z2;
+
+    tmp21 = z3 + tmp12;
+    tmp24 = z3 - tmp12;
+
+    tmp12 = z4 + z2;
+
+    tmp20 = tmp10 + tmp12;
+    tmp25 = tmp10 - tmp12;
+
+    tmp12 = z4 - z1 - z2;
+
+    tmp22 = tmp11 + tmp12;
+    tmp23 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
+    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
+
+    tmp10 = z1 + z3;
+    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
+    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
+    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
+    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
+    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
+    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
+    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
+	     MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
+
+    z1 -= z4;
+    z2 -= z3;
+    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
+    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
+    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 10x5 output block.
+ *
+ * 5-point IDCT in pass 1 (columns), 10-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		JCOEFPTR coef_block,
+		JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*5];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp12 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp13 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp14 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
+    z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
+    z3 = tmp12 + z2;
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z1;
+    tmp12 -= z2 << 2;
+
+    /* Odd part */
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));       /* c3 */
+    tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148));    /* c1-c3 */
+    tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899));    /* c1+c3 */
+
+    /* Final output stage */
+
+    wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4] = (int) RIGHT_SHIFT(tmp10 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*3] = (int) RIGHT_SHIFT(tmp11 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 5 rows from work array, store into output array.
+   * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 5; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z3 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
+    z3 <<= CONST_BITS;
+    z4 = (INT32) wsptr[4];
+    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
+    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z2;
+
+    tmp22 = z3 - ((z1 - z2) << 1);               /* c0 = (c4-c8)*2 */
+
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[6];
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
+    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
+    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
+
+    tmp20 = tmp10 + tmp12;
+    tmp24 = tmp10 - tmp12;
+    tmp21 = tmp11 + tmp13;
+    tmp23 = tmp11 - tmp13;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z3 <<= CONST_BITS;
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = z2 + z4;
+    tmp13 = z2 - z4;
+
+    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
+
+    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
+    z4 = z3 + tmp12;
+
+    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
+    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
+
+    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
+    z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
+
+    tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
+
+    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
+    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 8x4 output block.
+ *
+ * 4-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3;
+  INT32 tmp10, tmp11, tmp12, tmp13;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*4];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 4-point IDCT kernel,
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+
+    tmp10 = (tmp0 + tmp2) << PASS1_BITS;
+    tmp12 = (tmp0 - tmp2) << PASS1_BITS;
+
+    /* Odd part */
+    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);               /* c6 */
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
+		       CONST_BITS-PASS1_BITS);
+    tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
+		       CONST_BITS-PASS1_BITS);
+
+    /* Final output stage */
+
+    wsptr[8*0] = (int) (tmp10 + tmp0);
+    wsptr[8*3] = (int) (tmp10 - tmp0);
+    wsptr[8*1] = (int) (tmp12 + tmp2);
+    wsptr[8*2] = (int) (tmp12 - tmp2);
+  }
+
+  /* Pass 2: process rows from work array, store into output array.
+   * Note that we must descale the results by a factor of 8 == 2**3,
+   * and also undo the PASS1_BITS scaling.
+   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 4; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part: reverse the even part of the forward DCT.
+     * The rotator is c(-6).
+     */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z2 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
+    z3 = (INT32) wsptr[4];
+
+    tmp0 = (z2 + z3) << CONST_BITS;
+    tmp1 = (z2 - z3) << CONST_BITS;
+
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[6];
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
+
+    tmp10 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+    tmp11 = tmp1 + tmp3;
+    tmp12 = tmp1 - tmp3;
+
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+
+    tmp0 = (INT32) wsptr[7];
+    tmp1 = (INT32) wsptr[5];
+    tmp2 = (INT32) wsptr[3];
+    tmp3 = (INT32) wsptr[1];
+
+    z2 = tmp0 + tmp2;
+    z3 = tmp1 + tmp3;
+
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
+    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
+    z2 += z1;
+    z3 += z1;
+
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
+    tmp0 += z1 + z2;
+    tmp3 += z1 + z3;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
+    tmp1 += z1 + z3;
+    tmp2 += z1 + z2;
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 6x3 output block.
+ *
+ * 3-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[6*3];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
+    tmp10 = tmp0 + tmp12;
+    tmp2 = tmp0 - tmp12 - tmp12;
+
+    /* Odd part */
+
+    tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
+
+    /* Final output stage */
+
+    wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[6*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[6*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
+  }
+  
+  /* Pass 2: process 3 rows from work array, store into output array.
+   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 3; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = (INT32) wsptr[0] +
+	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	      (ONE << (PASS1_BITS+2)));
+    tmp0 <<= CONST_BITS;
+    tmp2 = (INT32) wsptr[4];
+    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
+    tmp1 = tmp0 + tmp10;
+    tmp11 = tmp0 - tmp10 - tmp10;
+    tmp10 = (INT32) wsptr[2];
+    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
+    tmp10 = tmp1 + tmp0;
+    tmp12 = tmp1 - tmp0;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
+    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
+    tmp1 = (z1 - z2 - z3) << CONST_BITS;
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 6;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 4x2 output block.
+ *
+ * 2-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_4x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp2, tmp10, tmp12;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  INT32 * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  INT32 workspace[4*2];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+
+    /* Odd part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+
+    /* Final output stage */
+
+    wsptr[4*0] = tmp10 + tmp0;
+    wsptr[4*1] = tmp10 - tmp0;
+  }
+
+  /* Pass 2: process 2 rows from work array, store into output array.
+   * 4-point IDCT kernel,
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 2; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = wsptr[0] + ((((INT32) RANGE_CENTER) << 3) + (ONE << 2));
+    tmp2 = wsptr[2];
+
+    tmp10 = (tmp0 + tmp2) << CONST_BITS;
+    tmp12 = (tmp0 - tmp2) << CONST_BITS;
+
+    /* Odd part */
+    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
+
+    z2 = wsptr[1];
+    z3 = wsptr[3];
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
+    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
+    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
+					      CONST_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
+					      CONST_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 4;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 2x1 output block.
+ *
+ * 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  DCTELEM tmp0, tmp1;
+  ISLOW_MULT_TYPE * quantptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  ISHIFT_TEMPS
+
+  /* Pass 1: empty. */
+
+  /* Pass 2: process 1 row from input, store into output array. */
+
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  outptr = output_buf[0] + output_col;
+
+  /* Even part */
+
+  tmp0 = DEQUANTIZE(coef_block[0], quantptr[0]);
+  /* Add range center and fudge factor for final descale and range-limit. */
+  tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
+
+  /* Odd part */
+
+  tmp1 = DEQUANTIZE(coef_block[1], quantptr[1]);
+
+  /* Final output stage */
+
+  outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
+  outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 8x16 output block.
+ *
+ * 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		JCOEFPTR coef_block,
+		JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*16];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
+    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
+
+    tmp10 = tmp0 + tmp1;
+    tmp11 = tmp0 - tmp1;
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+    z3 = z1 - z2;
+    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
+    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
+
+    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
+    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
+    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
+    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
+
+    tmp20 = tmp10 + tmp0;
+    tmp27 = tmp10 - tmp0;
+    tmp21 = tmp12 + tmp1;
+    tmp26 = tmp12 - tmp1;
+    tmp22 = tmp13 + tmp2;
+    tmp25 = tmp13 - tmp2;
+    tmp23 = tmp11 + tmp3;
+    tmp24 = tmp11 - tmp3;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = z1 + z3;
+
+    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
+    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
+    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
+    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
+    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
+    tmp0  = tmp1 + tmp2 + tmp3 -
+	    MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
+    tmp13 = tmp10 + tmp11 + tmp12 -
+	    MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
+    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
+    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
+    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
+    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
+    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
+    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
+    z2    += z4;
+    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
+    tmp1  += z1;
+    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
+    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
+    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
+    tmp12 += z2;
+    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
+    tmp2  += z2;
+    tmp3  += z2;
+    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
+    tmp10 += z2;
+    tmp11 += z2;
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp0,  CONST_BITS-PASS1_BITS);
+    wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0,  CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp1,  CONST_BITS-PASS1_BITS);
+    wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1,  CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp2,  CONST_BITS-PASS1_BITS);
+    wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2,  CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp3,  CONST_BITS-PASS1_BITS);
+    wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3,  CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process rows from work array, store into output array.
+   * Note that we must descale the results by a factor of 8 == 2**3,
+   * and also undo the PASS1_BITS scaling.
+   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 16; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part: reverse the even part of the forward DCT.
+     * The rotator is c(-6).
+     */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z2 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
+    z3 = (INT32) wsptr[4];
+
+    tmp0 = (z2 + z3) << CONST_BITS;
+    tmp1 = (z2 - z3) << CONST_BITS;
+
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[6];
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
+
+    tmp10 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+    tmp11 = tmp1 + tmp3;
+    tmp12 = tmp1 - tmp3;
+
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+
+    tmp0 = (INT32) wsptr[7];
+    tmp1 = (INT32) wsptr[5];
+    tmp2 = (INT32) wsptr[3];
+    tmp3 = (INT32) wsptr[1];
+
+    z2 = tmp0 + tmp2;
+    z3 = tmp1 + tmp3;
+
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
+    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
+    z2 += z1;
+    z3 += z1;
+
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
+    tmp0 += z1 + z2;
+    tmp3 += z1 + z3;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
+    tmp1 += z1 + z3;
+    tmp2 += z1 + z2;
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 7x14 output block.
+ *
+ * 14-point IDCT in pass 1 (columns), 7-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		JCOEFPTR coef_block,
+		JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[7*14];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z1 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
+    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
+    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
+
+    tmp10 = z1 + z2;
+    tmp11 = z1 + z3;
+    tmp12 = z1 - z4;
+
+    tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
+			CONST_BITS-PASS1_BITS);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
+
+    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
+    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
+    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
+	    MULTIPLY(z2, FIX(1.378756276));      /* c2 */
+
+    tmp20 = tmp10 + tmp13;
+    tmp26 = tmp10 - tmp13;
+    tmp21 = tmp11 + tmp14;
+    tmp25 = tmp11 - tmp14;
+    tmp22 = tmp12 + tmp15;
+    tmp24 = tmp12 - tmp15;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+    tmp13 = z4 << CONST_BITS;
+
+    tmp14 = z1 + z3;
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
+    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
+    tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
+    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
+    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
+    z1    -= z2;
+    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13;        /* c11 */
+    tmp16 += tmp15;
+    z1    += z4;
+    z4    = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
+    tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948));          /* c3-c9-c13 */
+    tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773));          /* c3+c5-c13 */
+    z4    = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
+    tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
+    tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567));          /* c1+c11-c5 */
+
+    tmp13 = (z1 - z3) << PASS1_BITS;
+
+    /* Final output stage */
+
+    wsptr[7*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[7*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[7*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[7*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[7*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[7*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[7*3]  = (int) (tmp23 + tmp13);
+    wsptr[7*10] = (int) (tmp23 - tmp13);
+    wsptr[7*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[7*9]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[7*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[7*8]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[7*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
+    wsptr[7*7]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 14 rows from work array, store into output array.
+   * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 14; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp23 = (INT32) wsptr[0] +
+	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	       (ONE << (PASS1_BITS+2)));
+    tmp23 <<= CONST_BITS;
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[4];
+    z3 = (INT32) wsptr[6];
+
+    tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734));       /* c4 */
+    tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123));       /* c6 */
+    tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
+    tmp10 = z1 + z3;
+    z2 -= tmp10;
+    tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
+    tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536));   /* c2-c4-c6 */
+    tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249));   /* c2+c4+c6 */
+    tmp23 += MULTIPLY(z2, FIX(1.414213562));           /* c0 */
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+
+    tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347));       /* (c3+c1-c5)/2 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339));       /* (c3+c5-c1)/2 */
+    tmp10 = tmp11 - tmp12;
+    tmp11 += tmp12;
+    tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276));     /* -c1 */
+    tmp11 += tmp12;
+    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));          /* c5 */
+    tmp10 += z2;
+    tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693));      /* c3+c1-c5 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 7;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 6x12 output block.
+ *
+ * 12-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		JCOEFPTR coef_block,
+		JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[6*12];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
+    z1 <<= CONST_BITS;
+    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+    z2 <<= CONST_BITS;
+
+    tmp12 = z1 - z2;
+
+    tmp21 = z3 + tmp12;
+    tmp24 = z3 - tmp12;
+
+    tmp12 = z4 + z2;
+
+    tmp20 = tmp10 + tmp12;
+    tmp25 = tmp10 - tmp12;
+
+    tmp12 = z4 - z1 - z2;
+
+    tmp22 = tmp11 + tmp12;
+    tmp23 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
+    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
+
+    tmp10 = z1 + z3;
+    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
+    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
+    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
+    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
+    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
+    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
+    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
+	     MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
+
+    z1 -= z4;
+    z2 -= z3;
+    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
+    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
+    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
+
+    /* Final output stage */
+
+    wsptr[6*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[6*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[6*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[6*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[6*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[6*9]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[6*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[6*8]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[6*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[6*7]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[6*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[6*6]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 12 rows from work array, store into output array.
+   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 12; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp10 = (INT32) wsptr[0] +
+	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	       (ONE << (PASS1_BITS+2)));
+    tmp10 <<= CONST_BITS;
+    tmp12 = (INT32) wsptr[4];
+    tmp20 = MULTIPLY(tmp12, FIX(0.707106781));   /* c4 */
+    tmp11 = tmp10 + tmp20;
+    tmp21 = tmp10 - tmp20 - tmp20;
+    tmp20 = (INT32) wsptr[2];
+    tmp10 = MULTIPLY(tmp20, FIX(1.224744871));   /* c2 */
+    tmp20 = tmp11 + tmp10;
+    tmp22 = tmp11 - tmp10;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+    tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
+    tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
+    tmp11 = (z1 - z2 - z3) << CONST_BITS;
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 6;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 5x10 output block.
+ *
+ * 10-point IDCT in pass 1 (columns), 5-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		JCOEFPTR coef_block,
+		JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
+  INT32 z1, z2, z3, z4, z5;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[5*10];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
+    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z2;
+
+    tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1),   /* c0 = (c4-c8)*2 */
+			CONST_BITS-PASS1_BITS);
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
+    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
+    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
+
+    tmp20 = tmp10 + tmp12;
+    tmp24 = tmp10 - tmp12;
+    tmp21 = tmp11 + tmp13;
+    tmp23 = tmp11 - tmp13;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = z2 + z4;
+    tmp13 = z2 - z4;
+
+    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
+    z5 = z3 << CONST_BITS;
+
+    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
+    z4 = z5 + tmp12;
+
+    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
+    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
+
+    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
+    z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
+
+    tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
+
+    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
+    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
+
+    /* Final output stage */
+
+    wsptr[5*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[5*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[5*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[5*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[5*2] = (int) (tmp22 + tmp12);
+    wsptr[5*7] = (int) (tmp22 - tmp12);
+    wsptr[5*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[5*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[5*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[5*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 10 rows from work array, store into output array.
+   * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 10; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp12 = (INT32) wsptr[0] +
+	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	       (ONE << (PASS1_BITS+2)));
+    tmp12 <<= CONST_BITS;
+    tmp13 = (INT32) wsptr[2];
+    tmp14 = (INT32) wsptr[4];
+    z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
+    z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
+    z3 = tmp12 + z2;
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z1;
+    tmp12 -= z2 << 2;
+
+    /* Odd part */
+
+    z2 = (INT32) wsptr[1];
+    z3 = (INT32) wsptr[3];
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));       /* c3 */
+    tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148));    /* c1-c3 */
+    tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899));    /* c1+c3 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp13,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp13,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp14,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp14,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 5;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 4x8 output block.
+ *
+ * 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3;
+  INT32 tmp10, tmp11, tmp12, tmp13;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[4*8];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * Note results are scaled up by sqrt(8) compared to a true IDCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 4; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+	inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero */
+      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
+
+      wsptr[4*0] = dcval;
+      wsptr[4*1] = dcval;
+      wsptr[4*2] = dcval;
+      wsptr[4*3] = dcval;
+      wsptr[4*4] = dcval;
+      wsptr[4*5] = dcval;
+      wsptr[4*6] = dcval;
+      wsptr[4*7] = dcval;
+
+      inptr++;			/* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+
+    /* Even part: reverse the even part of the forward DCT.
+     * The rotator is c(-6).
+     */
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z2 <<= CONST_BITS;
+    z3 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z2 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    tmp0 = z2 + z3;
+    tmp1 = z2 - z3;
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
+
+    tmp10 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+    tmp11 = tmp1 + tmp3;
+    tmp12 = tmp1 - tmp3;
+
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+
+    z2 = tmp0 + tmp2;
+    z3 = tmp1 + tmp3;
+
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
+    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
+    z2 += z1;
+    z3 += z1;
+
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
+    tmp0 += z1 + z2;
+    tmp3 += z1 + z3;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
+    tmp1 += z1 + z3;
+    tmp2 += z1 + z2;
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    wsptr[4*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[4*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[4*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[4*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[4*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[4*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[4*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[4*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
+
+    inptr++;			/* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+
+  /* Pass 2: process 8 rows from work array, store into output array.
+   * 4-point IDCT kernel,
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = (INT32) wsptr[0] +
+	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	      (ONE << (PASS1_BITS+2)));
+    tmp2 = (INT32) wsptr[2];
+
+    tmp10 = (tmp0 + tmp2) << CONST_BITS;
+    tmp12 = (tmp0 - tmp2) << CONST_BITS;
+
+    /* Odd part */
+    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
+
+    z2 = (INT32) wsptr[1];
+    z3 = (INT32) wsptr[3];
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
+    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
+    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 4;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 3x6 output block.
+ *
+ * 6-point IDCT in pass 1 (columns), 3-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[3*6];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
+    tmp1 = tmp0 + tmp10;
+    tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
+    tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
+    tmp10 = tmp1 + tmp0;
+    tmp12 = tmp1 - tmp0;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
+    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
+    tmp1 = (z1 - z2 - z3) << PASS1_BITS;
+
+    /* Final output stage */
+
+    wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[3*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[3*1] = (int) (tmp11 + tmp1);
+    wsptr[3*4] = (int) (tmp11 - tmp1);
+    wsptr[3*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[3*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 6 rows from work array, store into output array.
+   * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 6; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = (INT32) wsptr[0] +
+	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	      (ONE << (PASS1_BITS+2)));
+    tmp0 <<= CONST_BITS;
+    tmp2 = (INT32) wsptr[2];
+    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
+    tmp10 = tmp0 + tmp12;
+    tmp2 = tmp0 - tmp12 - tmp12;
+
+    /* Odd part */
+
+    tmp12 = (INT32) wsptr[1];
+    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 3;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 2x4 output block.
+ *
+ * 4-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp2, tmp10, tmp12;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  INT32 * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  INT32 workspace[2*4];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 4-point IDCT kernel,
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+
+    tmp10 = (tmp0 + tmp2) << CONST_BITS;
+    tmp12 = (tmp0 - tmp2) << CONST_BITS;
+
+    /* Odd part */
+    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
+    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
+    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
+
+    /* Final output stage */
+
+    wsptr[2*0] = tmp10 + tmp0;
+    wsptr[2*3] = tmp10 - tmp0;
+    wsptr[2*1] = tmp12 + tmp2;
+    wsptr[2*2] = tmp12 - tmp2;
+  }
+
+  /* Pass 2: process 4 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 4; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp10 = wsptr[0] +
+	      ((((INT32) RANGE_CENTER) << (CONST_BITS+3)) +
+	       (ONE << (CONST_BITS+2)));
+
+    /* Odd part */
+
+    tmp0 = wsptr[1];
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 2;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 1x2 output block.
+ *
+ * 2-point IDCT in pass 1 (columns), 1-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  DCTELEM tmp0, tmp1;
+  ISLOW_MULT_TYPE * quantptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  ISHIFT_TEMPS
+
+  /* Process 1 column from input, store into output array. */
+
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+
+  /* Even part */
+
+  tmp0 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
+  /* Add range center and fudge factor for final descale and range-limit. */
+  tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
+
+  /* Odd part */
+
+  tmp1 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
+
+  /* Final output stage */
+
+  output_buf[0][output_col] =
+    range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
+  output_buf[1][output_col] =
+    range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
+}
+
+#endif /* IDCT_SCALING_SUPPORTED */
+#endif /* DCT_ISLOW_SUPPORTED */
diff --git a/libraries/jpeg/jinclude.h b/libraries/jpeg/jinclude.h
new file mode 100644
index 000000000..20ed4ef11
--- /dev/null
+++ b/libraries/jpeg/jinclude.h
@@ -0,0 +1,97 @@
+/*
+ * jinclude.h
+ *
+ * Copyright (C) 1991-1994, Thomas G. Lane.
+ * Modified 2017 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file exists to provide a single place to fix any problems with
+ * including the wrong system include files.  (Common problems are taken
+ * care of by the standard jconfig symbols, but on really weird systems
+ * you may have to edit this file.)
+ *
+ * NOTE: this file is NOT intended to be included by applications using the
+ * JPEG library.  Most applications need only include jpeglib.h.
+ */
+
+
+/* Include auto-config file to find out which system include files we need. */
+
+#include "jconfig.h"		/* auto configuration options */
+#define JCONFIG_INCLUDED	/* so that jpeglib.h doesn't do it again */
+
+/*
+ * We need the NULL macro and size_t typedef.
+ * On an ANSI-conforming system it is sufficient to include <stddef.h>.
+ * Otherwise, we get them from <stdlib.h> or <stdio.h>; we may have to
+ * pull in <sys/types.h> as well.
+ * Note that the core JPEG library does not require <stdio.h>;
+ * only the default error handler and data source/destination modules do.
+ * But we must pull it in because of the references to FILE in jpeglib.h.
+ * You can remove those references if you want to compile without <stdio.h>.
+ */
+
+#ifdef HAVE_STDDEF_H
+#include <stddef.h>
+#endif
+
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+
+#ifdef NEED_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+
+#include <stdio.h>
+
+/*
+ * We need memory copying and zeroing functions, plus strncpy().
+ * ANSI and System V implementations declare these in <string.h>.
+ * BSD doesn't have the mem() functions, but it does have bcopy()/bzero().
+ * Some systems may declare memset and memcpy in <memory.h>.
+ *
+ * NOTE: we assume the size parameters to these functions are of type size_t.
+ * Change the casts in these macros if not!
+ */
+
+#ifdef NEED_BSD_STRINGS
+
+#include <strings.h>
+#define MEMZERO(target,size)	bzero((void *)(target), (size_t)(size))
+#define MEMCOPY(dest,src,size)	bcopy((const void *)(src), (void *)(dest), (size_t)(size))
+
+#else /* not BSD, assume ANSI/SysV string lib */
+
+#include <string.h>
+#define MEMZERO(target,size)	memset((void *)(target), 0, (size_t)(size))
+#define MEMCOPY(dest,src,size)	memcpy((void *)(dest), (const void *)(src), (size_t)(size))
+
+#endif
+
+/*
+ * In ANSI C, and indeed any rational implementation, size_t is also the
+ * type returned by sizeof().  However, it seems there are some irrational
+ * implementations out there, in which sizeof() returns an int even though
+ * size_t is defined as long or unsigned long.  To ensure consistent results
+ * we always use this SIZEOF() macro in place of using sizeof() directly.
+ */
+
+#define SIZEOF(object)	((size_t) sizeof(object))
+
+/*
+ * The modules that use fread() and fwrite() always invoke them through
+ * these macros.  On some systems you may need to twiddle the argument casts.
+ * CAUTION: argument order is different from underlying functions!
+ *
+ * Furthermore, macros are provided for fflush() and ferror() in order
+ * to facilitate adaption by applications using an own FILE class.
+ */
+
+#define JFREAD(file,buf,sizeofbuf)  \
+  ((size_t) fread((void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file)))
+#define JFWRITE(file,buf,sizeofbuf)  \
+  ((size_t) fwrite((const void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file)))
+#define JFFLUSH(file)	fflush(file)
+#define JFERROR(file)	ferror(file)
diff --git a/libraries/jpeg/jmemansi.c b/libraries/jpeg/jmemansi.c
new file mode 100644
index 000000000..2d93e4962
--- /dev/null
+++ b/libraries/jpeg/jmemansi.c
@@ -0,0 +1,167 @@
+/*
+ * jmemansi.c
+ *
+ * Copyright (C) 1992-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file provides a simple generic implementation of the system-
+ * dependent portion of the JPEG memory manager.  This implementation
+ * assumes that you have the ANSI-standard library routine tmpfile().
+ * Also, the problem of determining the amount of memory available
+ * is shoved onto the user.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jmemsys.h"		/* import the system-dependent declarations */
+
+#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare malloc(),free() */
+extern void * malloc JPP((size_t size));
+extern void free JPP((void *ptr));
+#endif
+
+#ifndef SEEK_SET		/* pre-ANSI systems may not define this; */
+#define SEEK_SET  0		/* if not, assume 0 is correct */
+#endif
+
+
+/*
+ * Memory allocation and freeing are controlled by the regular library
+ * routines malloc() and free().
+ */
+
+GLOBAL(void *)
+jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject)
+{
+  return (void *) malloc(sizeofobject);
+}
+
+GLOBAL(void)
+jpeg_free_small (j_common_ptr cinfo, void * object, size_t sizeofobject)
+{
+  free(object);
+}
+
+
+/*
+ * "Large" objects are treated the same as "small" ones.
+ * NB: although we include FAR keywords in the routine declarations,
+ * this file won't actually work in 80x86 small/medium model; at least,
+ * you probably won't be able to process useful-size images in only 64KB.
+ */
+
+GLOBAL(void FAR *)
+jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject)
+{
+  return (void FAR *) malloc(sizeofobject);
+}
+
+GLOBAL(void)
+jpeg_free_large (j_common_ptr cinfo, void FAR * object, size_t sizeofobject)
+{
+  free(object);
+}
+
+
+/*
+ * This routine computes the total memory space available for allocation.
+ * It's impossible to do this in a portable way; our current solution is
+ * to make the user tell us (with a default value set at compile time).
+ * If you can actually get the available space, it's a good idea to subtract
+ * a slop factor of 5% or so.
+ */
+
+#ifndef DEFAULT_MAX_MEM		/* so can override from makefile */
+#define DEFAULT_MAX_MEM		1000000L /* default: one megabyte */
+#endif
+
+GLOBAL(long)
+jpeg_mem_available (j_common_ptr cinfo, long min_bytes_needed,
+		    long max_bytes_needed, long already_allocated)
+{
+  return cinfo->mem->max_memory_to_use - already_allocated;
+}
+
+
+/*
+ * Backing store (temporary file) management.
+ * Backing store objects are only used when the value returned by
+ * jpeg_mem_available is less than the total space needed.  You can dispense
+ * with these routines if you have plenty of virtual memory; see jmemnobs.c.
+ */
+
+
+METHODDEF(void)
+read_backing_store (j_common_ptr cinfo, backing_store_ptr info,
+		    void FAR * buffer_address,
+		    long file_offset, long byte_count)
+{
+  if (fseek(info->temp_file, file_offset, SEEK_SET))
+    ERREXIT(cinfo, JERR_TFILE_SEEK);
+  if (JFREAD(info->temp_file, buffer_address, byte_count)
+      != (size_t) byte_count)
+    ERREXIT(cinfo, JERR_TFILE_READ);
+}
+
+
+METHODDEF(void)
+write_backing_store (j_common_ptr cinfo, backing_store_ptr info,
+		     void FAR * buffer_address,
+		     long file_offset, long byte_count)
+{
+  if (fseek(info->temp_file, file_offset, SEEK_SET))
+    ERREXIT(cinfo, JERR_TFILE_SEEK);
+  if (JFWRITE(info->temp_file, buffer_address, byte_count)
+      != (size_t) byte_count)
+    ERREXIT(cinfo, JERR_TFILE_WRITE);
+}
+
+
+METHODDEF(void)
+close_backing_store (j_common_ptr cinfo, backing_store_ptr info)
+{
+  fclose(info->temp_file);
+  /* Since this implementation uses tmpfile() to create the file,
+   * no explicit file deletion is needed.
+   */
+}
+
+
+/*
+ * Initial opening of a backing-store object.
+ *
+ * This version uses tmpfile(), which constructs a suitable file name
+ * behind the scenes.  We don't have to use info->temp_name[] at all;
+ * indeed, we can't even find out the actual name of the temp file.
+ */
+
+GLOBAL(void)
+jpeg_open_backing_store (j_common_ptr cinfo, backing_store_ptr info,
+			 long total_bytes_needed)
+{
+  if ((info->temp_file = tmpfile()) == NULL)
+    ERREXITS(cinfo, JERR_TFILE_CREATE, "");
+  info->read_backing_store = read_backing_store;
+  info->write_backing_store = write_backing_store;
+  info->close_backing_store = close_backing_store;
+}
+
+
+/*
+ * These routines take care of any system-dependent initialization and
+ * cleanup required.
+ */
+
+GLOBAL(long)
+jpeg_mem_init (j_common_ptr cinfo)
+{
+  return DEFAULT_MAX_MEM;	/* default for max_memory_to_use */
+}
+
+GLOBAL(void)
+jpeg_mem_term (j_common_ptr cinfo)
+{
+  /* no work */
+}
diff --git a/libraries/jpeg/jmemmgr.c b/libraries/jpeg/jmemmgr.c
new file mode 100644
index 000000000..0a137cdde
--- /dev/null
+++ b/libraries/jpeg/jmemmgr.c
@@ -0,0 +1,1119 @@
+/*
+ * jmemmgr.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2011-2012 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the JPEG system-independent memory management
+ * routines.  This code is usable across a wide variety of machines; most
+ * of the system dependencies have been isolated in a separate file.
+ * The major functions provided here are:
+ *   * pool-based allocation and freeing of memory;
+ *   * policy decisions about how to divide available memory among the
+ *     virtual arrays;
+ *   * control logic for swapping virtual arrays between main memory and
+ *     backing storage.
+ * The separate system-dependent file provides the actual backing-storage
+ * access code, and it contains the policy decision about how much total
+ * main memory to use.
+ * This file is system-dependent in the sense that some of its functions
+ * are unnecessary in some systems.  For example, if there is enough virtual
+ * memory so that backing storage will never be used, much of the virtual
+ * array control logic could be removed.  (Of course, if you have that much
+ * memory then you shouldn't care about a little bit of unused code...)
+ */
+
+#define JPEG_INTERNALS
+#define AM_MEMORY_MANAGER	/* we define jvirt_Xarray_control structs */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jmemsys.h"		/* import the system-dependent declarations */
+
+#ifndef NO_GETENV
+#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare getenv() */
+extern char * getenv JPP((const char * name));
+#endif
+#endif
+
+
+/*
+ * Some important notes:
+ *   The allocation routines provided here must never return NULL.
+ *   They should exit to error_exit if unsuccessful.
+ *
+ *   It's not a good idea to try to merge the sarray and barray routines,
+ *   even though they are textually almost the same, because samples are
+ *   usually stored as bytes while coefficients are shorts or ints.  Thus,
+ *   in machines where byte pointers have a different representation from
+ *   word pointers, the resulting machine code could not be the same.
+ */
+
+
+/*
+ * Many machines require storage alignment: longs must start on 4-byte
+ * boundaries, doubles on 8-byte boundaries, etc.  On such machines, malloc()
+ * always returns pointers that are multiples of the worst-case alignment
+ * requirement, and we had better do so too.
+ * There isn't any really portable way to determine the worst-case alignment
+ * requirement.  This module assumes that the alignment requirement is
+ * multiples of sizeof(ALIGN_TYPE).
+ * By default, we define ALIGN_TYPE as double.  This is necessary on some
+ * workstations (where doubles really do need 8-byte alignment) and will work
+ * fine on nearly everything.  If your machine has lesser alignment needs,
+ * you can save a few bytes by making ALIGN_TYPE smaller.
+ * The only place I know of where this will NOT work is certain Macintosh
+ * 680x0 compilers that define double as a 10-byte IEEE extended float.
+ * Doing 10-byte alignment is counterproductive because longwords won't be
+ * aligned well.  Put "#define ALIGN_TYPE long" in jconfig.h if you have
+ * such a compiler.
+ */
+
+#ifndef ALIGN_TYPE		/* so can override from jconfig.h */
+#define ALIGN_TYPE  double
+#endif
+
+
+/*
+ * We allocate objects from "pools", where each pool is gotten with a single
+ * request to jpeg_get_small() or jpeg_get_large().  There is no per-object
+ * overhead within a pool, except for alignment padding.  Each pool has a
+ * header with a link to the next pool of the same class.
+ * Small and large pool headers are identical except that the latter's
+ * link pointer must be FAR on 80x86 machines.
+ * Notice that the "real" header fields are union'ed with a dummy ALIGN_TYPE
+ * field.  This forces the compiler to make SIZEOF(small_pool_hdr) a multiple
+ * of the alignment requirement of ALIGN_TYPE.
+ */
+
+typedef union small_pool_struct * small_pool_ptr;
+
+typedef union small_pool_struct {
+  struct {
+    small_pool_ptr next;	/* next in list of pools */
+    size_t bytes_used;		/* how many bytes already used within pool */
+    size_t bytes_left;		/* bytes still available in this pool */
+  } hdr;
+  ALIGN_TYPE dummy;		/* included in union to ensure alignment */
+} small_pool_hdr;
+
+typedef union large_pool_struct FAR * large_pool_ptr;
+
+typedef union large_pool_struct {
+  struct {
+    large_pool_ptr next;	/* next in list of pools */
+    size_t bytes_used;		/* how many bytes already used within pool */
+    size_t bytes_left;		/* bytes still available in this pool */
+  } hdr;
+  ALIGN_TYPE dummy;		/* included in union to ensure alignment */
+} large_pool_hdr;
+
+
+/*
+ * Here is the full definition of a memory manager object.
+ */
+
+typedef struct {
+  struct jpeg_memory_mgr pub;	/* public fields */
+
+  /* Each pool identifier (lifetime class) names a linked list of pools. */
+  small_pool_ptr small_list[JPOOL_NUMPOOLS];
+  large_pool_ptr large_list[JPOOL_NUMPOOLS];
+
+  /* Since we only have one lifetime class of virtual arrays, only one
+   * linked list is necessary (for each datatype).  Note that the virtual
+   * array control blocks being linked together are actually stored somewhere
+   * in the small-pool list.
+   */
+  jvirt_sarray_ptr virt_sarray_list;
+  jvirt_barray_ptr virt_barray_list;
+
+  /* This counts total space obtained from jpeg_get_small/large */
+  long total_space_allocated;
+
+  /* alloc_sarray and alloc_barray set this value for use by virtual
+   * array routines.
+   */
+  JDIMENSION last_rowsperchunk;	/* from most recent alloc_sarray/barray */
+} my_memory_mgr;
+
+typedef my_memory_mgr * my_mem_ptr;
+
+
+/*
+ * The control blocks for virtual arrays.
+ * Note that these blocks are allocated in the "small" pool area.
+ * System-dependent info for the associated backing store (if any) is hidden
+ * inside the backing_store_info struct.
+ */
+
+struct jvirt_sarray_control {
+  JSAMPARRAY mem_buffer;	/* => the in-memory buffer */
+  JDIMENSION rows_in_array;	/* total virtual array height */
+  JDIMENSION samplesperrow;	/* width of array (and of memory buffer) */
+  JDIMENSION maxaccess;		/* max rows accessed by access_virt_sarray */
+  JDIMENSION rows_in_mem;	/* height of memory buffer */
+  JDIMENSION rowsperchunk;	/* allocation chunk size in mem_buffer */
+  JDIMENSION cur_start_row;	/* first logical row # in the buffer */
+  JDIMENSION first_undef_row;	/* row # of first uninitialized row */
+  boolean pre_zero;		/* pre-zero mode requested? */
+  boolean dirty;		/* do current buffer contents need written? */
+  boolean b_s_open;		/* is backing-store data valid? */
+  jvirt_sarray_ptr next;	/* link to next virtual sarray control block */
+  backing_store_info b_s_info;	/* System-dependent control info */
+};
+
+struct jvirt_barray_control {
+  JBLOCKARRAY mem_buffer;	/* => the in-memory buffer */
+  JDIMENSION rows_in_array;	/* total virtual array height */
+  JDIMENSION blocksperrow;	/* width of array (and of memory buffer) */
+  JDIMENSION maxaccess;		/* max rows accessed by access_virt_barray */
+  JDIMENSION rows_in_mem;	/* height of memory buffer */
+  JDIMENSION rowsperchunk;	/* allocation chunk size in mem_buffer */
+  JDIMENSION cur_start_row;	/* first logical row # in the buffer */
+  JDIMENSION first_undef_row;	/* row # of first uninitialized row */
+  boolean pre_zero;		/* pre-zero mode requested? */
+  boolean dirty;		/* do current buffer contents need written? */
+  boolean b_s_open;		/* is backing-store data valid? */
+  jvirt_barray_ptr next;	/* link to next virtual barray control block */
+  backing_store_info b_s_info;	/* System-dependent control info */
+};
+
+
+#ifdef MEM_STATS		/* optional extra stuff for statistics */
+
+LOCAL(void)
+print_mem_stats (j_common_ptr cinfo, int pool_id)
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  small_pool_ptr shdr_ptr;
+  large_pool_ptr lhdr_ptr;
+
+  /* Since this is only a debugging stub, we can cheat a little by using
+   * fprintf directly rather than going through the trace message code.
+   * This is helpful because message parm array can't handle longs.
+   */
+  fprintf(stderr, "Freeing pool %d, total space = %ld\n",
+	  pool_id, mem->total_space_allocated);
+
+  for (lhdr_ptr = mem->large_list[pool_id]; lhdr_ptr != NULL;
+       lhdr_ptr = lhdr_ptr->hdr.next) {
+    fprintf(stderr, "  Large chunk used %ld\n",
+	    (long) lhdr_ptr->hdr.bytes_used);
+  }
+
+  for (shdr_ptr = mem->small_list[pool_id]; shdr_ptr != NULL;
+       shdr_ptr = shdr_ptr->hdr.next) {
+    fprintf(stderr, "  Small chunk used %ld free %ld\n",
+	    (long) shdr_ptr->hdr.bytes_used,
+	    (long) shdr_ptr->hdr.bytes_left);
+  }
+}
+
+#endif /* MEM_STATS */
+
+
+LOCAL(noreturn_t)
+out_of_memory (j_common_ptr cinfo, int which)
+/* Report an out-of-memory error and stop execution */
+/* If we compiled MEM_STATS support, report alloc requests before dying */
+{
+#ifdef MEM_STATS
+  cinfo->err->trace_level = 2;	/* force self_destruct to report stats */
+#endif
+  ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, which);
+}
+
+
+/*
+ * Allocation of "small" objects.
+ *
+ * For these, we use pooled storage.  When a new pool must be created,
+ * we try to get enough space for the current request plus a "slop" factor,
+ * where the slop will be the amount of leftover space in the new pool.
+ * The speed vs. space tradeoff is largely determined by the slop values.
+ * A different slop value is provided for each pool class (lifetime),
+ * and we also distinguish the first pool of a class from later ones.
+ * NOTE: the values given work fairly well on both 16- and 32-bit-int
+ * machines, but may be too small if longs are 64 bits or more.
+ */
+
+static const size_t first_pool_slop[JPOOL_NUMPOOLS] = 
+{
+	1600,			/* first PERMANENT pool */
+	16000			/* first IMAGE pool */
+};
+
+static const size_t extra_pool_slop[JPOOL_NUMPOOLS] = 
+{
+	0,			/* additional PERMANENT pools */
+	5000			/* additional IMAGE pools */
+};
+
+#define MIN_SLOP  50		/* greater than 0 to avoid futile looping */
+
+
+METHODDEF(void *)
+alloc_small (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
+/* Allocate a "small" object */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  small_pool_ptr hdr_ptr, prev_hdr_ptr;
+  char * data_ptr;
+  size_t odd_bytes, min_request, slop;
+
+  /* Check for unsatisfiable request (do now to ensure no overflow below) */
+  if (sizeofobject > (size_t) (MAX_ALLOC_CHUNK-SIZEOF(small_pool_hdr)))
+    out_of_memory(cinfo, 1);	/* request exceeds malloc's ability */
+
+  /* Round up the requested size to a multiple of SIZEOF(ALIGN_TYPE) */
+  odd_bytes = sizeofobject % SIZEOF(ALIGN_TYPE);
+  if (odd_bytes > 0)
+    sizeofobject += SIZEOF(ALIGN_TYPE) - odd_bytes;
+
+  /* See if space is available in any existing pool */
+  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+  prev_hdr_ptr = NULL;
+  hdr_ptr = mem->small_list[pool_id];
+  while (hdr_ptr != NULL) {
+    if (hdr_ptr->hdr.bytes_left >= sizeofobject)
+      break;			/* found pool with enough space */
+    prev_hdr_ptr = hdr_ptr;
+    hdr_ptr = hdr_ptr->hdr.next;
+  }
+
+  /* Time to make a new pool? */
+  if (hdr_ptr == NULL) {
+    /* min_request is what we need now, slop is what will be leftover */
+    min_request = sizeofobject + SIZEOF(small_pool_hdr);
+    if (prev_hdr_ptr == NULL)	/* first pool in class? */
+      slop = first_pool_slop[pool_id];
+    else
+      slop = extra_pool_slop[pool_id];
+    /* Don't ask for more than MAX_ALLOC_CHUNK */
+    if (slop > (size_t) (MAX_ALLOC_CHUNK-min_request))
+      slop = (size_t) (MAX_ALLOC_CHUNK-min_request);
+    /* Try to get space, if fail reduce slop and try again */
+    for (;;) {
+      hdr_ptr = (small_pool_ptr) jpeg_get_small(cinfo, min_request + slop);
+      if (hdr_ptr != NULL)
+	break;
+      slop /= 2;
+      if (slop < MIN_SLOP)	/* give up when it gets real small */
+	out_of_memory(cinfo, 2); /* jpeg_get_small failed */
+    }
+    mem->total_space_allocated += min_request + slop;
+    /* Success, initialize the new pool header and add to end of list */
+    hdr_ptr->hdr.next = NULL;
+    hdr_ptr->hdr.bytes_used = 0;
+    hdr_ptr->hdr.bytes_left = sizeofobject + slop;
+    if (prev_hdr_ptr == NULL)	/* first pool in class? */
+      mem->small_list[pool_id] = hdr_ptr;
+    else
+      prev_hdr_ptr->hdr.next = hdr_ptr;
+  }
+
+  /* OK, allocate the object from the current pool */
+  data_ptr = (char *) (hdr_ptr + 1); /* point to first data byte in pool */
+  data_ptr += hdr_ptr->hdr.bytes_used; /* point to place for object */
+  hdr_ptr->hdr.bytes_used += sizeofobject;
+  hdr_ptr->hdr.bytes_left -= sizeofobject;
+
+  return (void *) data_ptr;
+}
+
+
+/*
+ * Allocation of "large" objects.
+ *
+ * The external semantics of these are the same as "small" objects,
+ * except that FAR pointers are used on 80x86.  However the pool
+ * management heuristics are quite different.  We assume that each
+ * request is large enough that it may as well be passed directly to
+ * jpeg_get_large; the pool management just links everything together
+ * so that we can free it all on demand.
+ * Note: the major use of "large" objects is in JSAMPARRAY and JBLOCKARRAY
+ * structures.  The routines that create these structures (see below)
+ * deliberately bunch rows together to ensure a large request size.
+ */
+
+METHODDEF(void FAR *)
+alloc_large (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
+/* Allocate a "large" object */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  large_pool_ptr hdr_ptr;
+  size_t odd_bytes;
+
+  /* Check for unsatisfiable request (do now to ensure no overflow below) */
+  if (sizeofobject > (size_t) (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)))
+    out_of_memory(cinfo, 3);	/* request exceeds malloc's ability */
+
+  /* Round up the requested size to a multiple of SIZEOF(ALIGN_TYPE) */
+  odd_bytes = sizeofobject % SIZEOF(ALIGN_TYPE);
+  if (odd_bytes > 0)
+    sizeofobject += SIZEOF(ALIGN_TYPE) - odd_bytes;
+
+  /* Always make a new pool */
+  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+
+  hdr_ptr = (large_pool_ptr) jpeg_get_large(cinfo, sizeofobject +
+					    SIZEOF(large_pool_hdr));
+  if (hdr_ptr == NULL)
+    out_of_memory(cinfo, 4);	/* jpeg_get_large failed */
+  mem->total_space_allocated += sizeofobject + SIZEOF(large_pool_hdr);
+
+  /* Success, initialize the new pool header and add to list */
+  hdr_ptr->hdr.next = mem->large_list[pool_id];
+  /* We maintain space counts in each pool header for statistical purposes,
+   * even though they are not needed for allocation.
+   */
+  hdr_ptr->hdr.bytes_used = sizeofobject;
+  hdr_ptr->hdr.bytes_left = 0;
+  mem->large_list[pool_id] = hdr_ptr;
+
+  return (void FAR *) (hdr_ptr + 1); /* point to first data byte in pool */
+}
+
+
+/*
+ * Creation of 2-D sample arrays.
+ * The pointers are in near heap, the samples themselves in FAR heap.
+ *
+ * To minimize allocation overhead and to allow I/O of large contiguous
+ * blocks, we allocate the sample rows in groups of as many rows as possible
+ * without exceeding MAX_ALLOC_CHUNK total bytes per allocation request.
+ * NB: the virtual array control routines, later in this file, know about
+ * this chunking of rows.  The rowsperchunk value is left in the mem manager
+ * object so that it can be saved away if this sarray is the workspace for
+ * a virtual array.
+ */
+
+METHODDEF(JSAMPARRAY)
+alloc_sarray (j_common_ptr cinfo, int pool_id,
+	      JDIMENSION samplesperrow, JDIMENSION numrows)
+/* Allocate a 2-D sample array */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  JSAMPARRAY result;
+  JSAMPROW workspace;
+  JDIMENSION rowsperchunk, currow, i;
+  long ltemp;
+
+  /* Calculate max # of rows allowed in one allocation chunk */
+  ltemp = (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)) /
+	  ((long) samplesperrow * SIZEOF(JSAMPLE));
+  if (ltemp <= 0)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+  if (ltemp < (long) numrows)
+    rowsperchunk = (JDIMENSION) ltemp;
+  else
+    rowsperchunk = numrows;
+  mem->last_rowsperchunk = rowsperchunk;
+
+  /* Get space for row pointers (small object) */
+  result = (JSAMPARRAY) alloc_small(cinfo, pool_id,
+				    (size_t) (numrows * SIZEOF(JSAMPROW)));
+
+  /* Get the rows themselves (large objects) */
+  currow = 0;
+  while (currow < numrows) {
+    rowsperchunk = MIN(rowsperchunk, numrows - currow);
+    workspace = (JSAMPROW) alloc_large(cinfo, pool_id,
+	(size_t) ((size_t) rowsperchunk * (size_t) samplesperrow
+		  * SIZEOF(JSAMPLE)));
+    for (i = rowsperchunk; i > 0; i--) {
+      result[currow++] = workspace;
+      workspace += samplesperrow;
+    }
+  }
+
+  return result;
+}
+
+
+/*
+ * Creation of 2-D coefficient-block arrays.
+ * This is essentially the same as the code for sample arrays, above.
+ */
+
+METHODDEF(JBLOCKARRAY)
+alloc_barray (j_common_ptr cinfo, int pool_id,
+	      JDIMENSION blocksperrow, JDIMENSION numrows)
+/* Allocate a 2-D coefficient-block array */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  JBLOCKARRAY result;
+  JBLOCKROW workspace;
+  JDIMENSION rowsperchunk, currow, i;
+  long ltemp;
+
+  /* Calculate max # of rows allowed in one allocation chunk */
+  ltemp = (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)) /
+	  ((long) blocksperrow * SIZEOF(JBLOCK));
+  if (ltemp <= 0)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+  if (ltemp < (long) numrows)
+    rowsperchunk = (JDIMENSION) ltemp;
+  else
+    rowsperchunk = numrows;
+  mem->last_rowsperchunk = rowsperchunk;
+
+  /* Get space for row pointers (small object) */
+  result = (JBLOCKARRAY) alloc_small(cinfo, pool_id,
+				     (size_t) (numrows * SIZEOF(JBLOCKROW)));
+
+  /* Get the rows themselves (large objects) */
+  currow = 0;
+  while (currow < numrows) {
+    rowsperchunk = MIN(rowsperchunk, numrows - currow);
+    workspace = (JBLOCKROW) alloc_large(cinfo, pool_id,
+	(size_t) ((size_t) rowsperchunk * (size_t) blocksperrow
+		  * SIZEOF(JBLOCK)));
+    for (i = rowsperchunk; i > 0; i--) {
+      result[currow++] = workspace;
+      workspace += blocksperrow;
+    }
+  }
+
+  return result;
+}
+
+
+/*
+ * About virtual array management:
+ *
+ * The above "normal" array routines are only used to allocate strip buffers
+ * (as wide as the image, but just a few rows high).  Full-image-sized buffers
+ * are handled as "virtual" arrays.  The array is still accessed a strip at a
+ * time, but the memory manager must save the whole array for repeated
+ * accesses.  The intended implementation is that there is a strip buffer in
+ * memory (as high as is possible given the desired memory limit), plus a
+ * backing file that holds the rest of the array.
+ *
+ * The request_virt_array routines are told the total size of the image and
+ * the maximum number of rows that will be accessed at once.  The in-memory
+ * buffer must be at least as large as the maxaccess value.
+ *
+ * The request routines create control blocks but not the in-memory buffers.
+ * That is postponed until realize_virt_arrays is called.  At that time the
+ * total amount of space needed is known (approximately, anyway), so free
+ * memory can be divided up fairly.
+ *
+ * The access_virt_array routines are responsible for making a specific strip
+ * area accessible (after reading or writing the backing file, if necessary).
+ * Note that the access routines are told whether the caller intends to modify
+ * the accessed strip; during a read-only pass this saves having to rewrite
+ * data to disk.  The access routines are also responsible for pre-zeroing
+ * any newly accessed rows, if pre-zeroing was requested.
+ *
+ * In current usage, the access requests are usually for nonoverlapping
+ * strips; that is, successive access start_row numbers differ by exactly
+ * num_rows = maxaccess.  This means we can get good performance with simple
+ * buffer dump/reload logic, by making the in-memory buffer be a multiple
+ * of the access height; then there will never be accesses across bufferload
+ * boundaries.  The code will still work with overlapping access requests,
+ * but it doesn't handle bufferload overlaps very efficiently.
+ */
+
+
+METHODDEF(jvirt_sarray_ptr)
+request_virt_sarray (j_common_ptr cinfo, int pool_id, boolean pre_zero,
+		     JDIMENSION samplesperrow, JDIMENSION numrows,
+		     JDIMENSION maxaccess)
+/* Request a virtual 2-D sample array */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  jvirt_sarray_ptr result;
+
+  /* Only IMAGE-lifetime virtual arrays are currently supported */
+  if (pool_id != JPOOL_IMAGE)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+
+  /* get control block */
+  result = (jvirt_sarray_ptr) alloc_small(cinfo, pool_id,
+					  SIZEOF(struct jvirt_sarray_control));
+
+  result->mem_buffer = NULL;	/* marks array not yet realized */
+  result->rows_in_array = numrows;
+  result->samplesperrow = samplesperrow;
+  result->maxaccess = maxaccess;
+  result->pre_zero = pre_zero;
+  result->b_s_open = FALSE;	/* no associated backing-store object */
+  result->next = mem->virt_sarray_list; /* add to list of virtual arrays */
+  mem->virt_sarray_list = result;
+
+  return result;
+}
+
+
+METHODDEF(jvirt_barray_ptr)
+request_virt_barray (j_common_ptr cinfo, int pool_id, boolean pre_zero,
+		     JDIMENSION blocksperrow, JDIMENSION numrows,
+		     JDIMENSION maxaccess)
+/* Request a virtual 2-D coefficient-block array */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  jvirt_barray_ptr result;
+
+  /* Only IMAGE-lifetime virtual arrays are currently supported */
+  if (pool_id != JPOOL_IMAGE)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+
+  /* get control block */
+  result = (jvirt_barray_ptr) alloc_small(cinfo, pool_id,
+					  SIZEOF(struct jvirt_barray_control));
+
+  result->mem_buffer = NULL;	/* marks array not yet realized */
+  result->rows_in_array = numrows;
+  result->blocksperrow = blocksperrow;
+  result->maxaccess = maxaccess;
+  result->pre_zero = pre_zero;
+  result->b_s_open = FALSE;	/* no associated backing-store object */
+  result->next = mem->virt_barray_list; /* add to list of virtual arrays */
+  mem->virt_barray_list = result;
+
+  return result;
+}
+
+
+METHODDEF(void)
+realize_virt_arrays (j_common_ptr cinfo)
+/* Allocate the in-memory buffers for any unrealized virtual arrays */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  long space_per_minheight, maximum_space, avail_mem;
+  long minheights, max_minheights;
+  jvirt_sarray_ptr sptr;
+  jvirt_barray_ptr bptr;
+
+  /* Compute the minimum space needed (maxaccess rows in each buffer)
+   * and the maximum space needed (full image height in each buffer).
+   * These may be of use to the system-dependent jpeg_mem_available routine.
+   */
+  space_per_minheight = 0;
+  maximum_space = 0;
+  for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
+    if (sptr->mem_buffer == NULL) { /* if not realized yet */
+      space_per_minheight += (long) sptr->maxaccess *
+			     (long) sptr->samplesperrow * SIZEOF(JSAMPLE);
+      maximum_space += (long) sptr->rows_in_array *
+		       (long) sptr->samplesperrow * SIZEOF(JSAMPLE);
+    }
+  }
+  for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
+    if (bptr->mem_buffer == NULL) { /* if not realized yet */
+      space_per_minheight += (long) bptr->maxaccess *
+			     (long) bptr->blocksperrow * SIZEOF(JBLOCK);
+      maximum_space += (long) bptr->rows_in_array *
+		       (long) bptr->blocksperrow * SIZEOF(JBLOCK);
+    }
+  }
+
+  if (space_per_minheight <= 0)
+    return;			/* no unrealized arrays, no work */
+
+  /* Determine amount of memory to actually use; this is system-dependent. */
+  avail_mem = jpeg_mem_available(cinfo, space_per_minheight, maximum_space,
+				 mem->total_space_allocated);
+
+  /* If the maximum space needed is available, make all the buffers full
+   * height; otherwise parcel it out with the same number of minheights
+   * in each buffer.
+   */
+  if (avail_mem >= maximum_space)
+    max_minheights = 1000000000L;
+  else {
+    max_minheights = avail_mem / space_per_minheight;
+    /* If there doesn't seem to be enough space, try to get the minimum
+     * anyway.  This allows a "stub" implementation of jpeg_mem_available().
+     */
+    if (max_minheights <= 0)
+      max_minheights = 1;
+  }
+
+  /* Allocate the in-memory buffers and initialize backing store as needed. */
+
+  for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
+    if (sptr->mem_buffer == NULL) { /* if not realized yet */
+      minheights = ((long) sptr->rows_in_array - 1L) / sptr->maxaccess + 1L;
+      if (minheights <= max_minheights) {
+	/* This buffer fits in memory */
+	sptr->rows_in_mem = sptr->rows_in_array;
+      } else {
+	/* It doesn't fit in memory, create backing store. */
+	sptr->rows_in_mem = (JDIMENSION) (max_minheights * sptr->maxaccess);
+	jpeg_open_backing_store(cinfo, & sptr->b_s_info,
+				(long) sptr->rows_in_array *
+				(long) sptr->samplesperrow *
+				(long) SIZEOF(JSAMPLE));
+	sptr->b_s_open = TRUE;
+      }
+      sptr->mem_buffer = alloc_sarray(cinfo, JPOOL_IMAGE,
+				      sptr->samplesperrow, sptr->rows_in_mem);
+      sptr->rowsperchunk = mem->last_rowsperchunk;
+      sptr->cur_start_row = 0;
+      sptr->first_undef_row = 0;
+      sptr->dirty = FALSE;
+    }
+  }
+
+  for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
+    if (bptr->mem_buffer == NULL) { /* if not realized yet */
+      minheights = ((long) bptr->rows_in_array - 1L) / bptr->maxaccess + 1L;
+      if (minheights <= max_minheights) {
+	/* This buffer fits in memory */
+	bptr->rows_in_mem = bptr->rows_in_array;
+      } else {
+	/* It doesn't fit in memory, create backing store. */
+	bptr->rows_in_mem = (JDIMENSION) (max_minheights * bptr->maxaccess);
+	jpeg_open_backing_store(cinfo, & bptr->b_s_info,
+				(long) bptr->rows_in_array *
+				(long) bptr->blocksperrow *
+				(long) SIZEOF(JBLOCK));
+	bptr->b_s_open = TRUE;
+      }
+      bptr->mem_buffer = alloc_barray(cinfo, JPOOL_IMAGE,
+				      bptr->blocksperrow, bptr->rows_in_mem);
+      bptr->rowsperchunk = mem->last_rowsperchunk;
+      bptr->cur_start_row = 0;
+      bptr->first_undef_row = 0;
+      bptr->dirty = FALSE;
+    }
+  }
+}
+
+
+LOCAL(void)
+do_sarray_io (j_common_ptr cinfo, jvirt_sarray_ptr ptr, boolean writing)
+/* Do backing store read or write of a virtual sample array */
+{
+  long bytesperrow, file_offset, byte_count, rows, thisrow, i;
+
+  bytesperrow = (long) ptr->samplesperrow * SIZEOF(JSAMPLE);
+  file_offset = ptr->cur_start_row * bytesperrow;
+  /* Loop to read or write each allocation chunk in mem_buffer */
+  for (i = 0; i < (long) ptr->rows_in_mem; i += ptr->rowsperchunk) {
+    /* One chunk, but check for short chunk at end of buffer */
+    rows = MIN((long) ptr->rowsperchunk, (long) ptr->rows_in_mem - i);
+    /* Transfer no more than is currently defined */
+    thisrow = (long) ptr->cur_start_row + i;
+    rows = MIN(rows, (long) ptr->first_undef_row - thisrow);
+    /* Transfer no more than fits in file */
+    rows = MIN(rows, (long) ptr->rows_in_array - thisrow);
+    if (rows <= 0)		/* this chunk might be past end of file! */
+      break;
+    byte_count = rows * bytesperrow;
+    if (writing)
+      (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info,
+					    (void FAR *) ptr->mem_buffer[i],
+					    file_offset, byte_count);
+    else
+      (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info,
+					   (void FAR *) ptr->mem_buffer[i],
+					   file_offset, byte_count);
+    file_offset += byte_count;
+  }
+}
+
+
+LOCAL(void)
+do_barray_io (j_common_ptr cinfo, jvirt_barray_ptr ptr, boolean writing)
+/* Do backing store read or write of a virtual coefficient-block array */
+{
+  long bytesperrow, file_offset, byte_count, rows, thisrow, i;
+
+  bytesperrow = (long) ptr->blocksperrow * SIZEOF(JBLOCK);
+  file_offset = ptr->cur_start_row * bytesperrow;
+  /* Loop to read or write each allocation chunk in mem_buffer */
+  for (i = 0; i < (long) ptr->rows_in_mem; i += ptr->rowsperchunk) {
+    /* One chunk, but check for short chunk at end of buffer */
+    rows = MIN((long) ptr->rowsperchunk, (long) ptr->rows_in_mem - i);
+    /* Transfer no more than is currently defined */
+    thisrow = (long) ptr->cur_start_row + i;
+    rows = MIN(rows, (long) ptr->first_undef_row - thisrow);
+    /* Transfer no more than fits in file */
+    rows = MIN(rows, (long) ptr->rows_in_array - thisrow);
+    if (rows <= 0)		/* this chunk might be past end of file! */
+      break;
+    byte_count = rows * bytesperrow;
+    if (writing)
+      (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info,
+					    (void FAR *) ptr->mem_buffer[i],
+					    file_offset, byte_count);
+    else
+      (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info,
+					   (void FAR *) ptr->mem_buffer[i],
+					   file_offset, byte_count);
+    file_offset += byte_count;
+  }
+}
+
+
+METHODDEF(JSAMPARRAY)
+access_virt_sarray (j_common_ptr cinfo, jvirt_sarray_ptr ptr,
+		    JDIMENSION start_row, JDIMENSION num_rows,
+		    boolean writable)
+/* Access the part of a virtual sample array starting at start_row */
+/* and extending for num_rows rows.  writable is true if  */
+/* caller intends to modify the accessed area. */
+{
+  JDIMENSION end_row = start_row + num_rows;
+  JDIMENSION undef_row;
+
+  /* debugging check */
+  if (end_row > ptr->rows_in_array || num_rows > ptr->maxaccess ||
+      ptr->mem_buffer == NULL)
+    ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+
+  /* Make the desired part of the virtual array accessible */
+  if (start_row < ptr->cur_start_row ||
+      end_row > ptr->cur_start_row+ptr->rows_in_mem) {
+    if (! ptr->b_s_open)
+      ERREXIT(cinfo, JERR_VIRTUAL_BUG);
+    /* Flush old buffer contents if necessary */
+    if (ptr->dirty) {
+      do_sarray_io(cinfo, ptr, TRUE);
+      ptr->dirty = FALSE;
+    }
+    /* Decide what part of virtual array to access.
+     * Algorithm: if target address > current window, assume forward scan,
+     * load starting at target address.  If target address < current window,
+     * assume backward scan, load so that target area is top of window.
+     * Note that when switching from forward write to forward read, will have
+     * start_row = 0, so the limiting case applies and we load from 0 anyway.
+     */
+    if (start_row > ptr->cur_start_row) {
+      ptr->cur_start_row = start_row;
+    } else {
+      /* use long arithmetic here to avoid overflow & unsigned problems */
+      long ltemp;
+
+      ltemp = (long) end_row - (long) ptr->rows_in_mem;
+      if (ltemp < 0)
+	ltemp = 0;		/* don't fall off front end of file */
+      ptr->cur_start_row = (JDIMENSION) ltemp;
+    }
+    /* Read in the selected part of the array.
+     * During the initial write pass, we will do no actual read
+     * because the selected part is all undefined.
+     */
+    do_sarray_io(cinfo, ptr, FALSE);
+  }
+  /* Ensure the accessed part of the array is defined; prezero if needed.
+   * To improve locality of access, we only prezero the part of the array
+   * that the caller is about to access, not the entire in-memory array.
+   */
+  if (ptr->first_undef_row < end_row) {
+    if (ptr->first_undef_row < start_row) {
+      if (writable)		/* writer skipped over a section of array */
+	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+      undef_row = start_row;	/* but reader is allowed to read ahead */
+    } else {
+      undef_row = ptr->first_undef_row;
+    }
+    if (writable)
+      ptr->first_undef_row = end_row;
+    if (ptr->pre_zero) {
+      size_t bytesperrow = (size_t) ptr->samplesperrow * SIZEOF(JSAMPLE);
+      undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */
+      end_row -= ptr->cur_start_row;
+      while (undef_row < end_row) {
+	FMEMZERO((void FAR *) ptr->mem_buffer[undef_row], bytesperrow);
+	undef_row++;
+      }
+    } else {
+      if (! writable)		/* reader looking at undefined data */
+	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+    }
+  }
+  /* Flag the buffer dirty if caller will write in it */
+  if (writable)
+    ptr->dirty = TRUE;
+  /* Return address of proper part of the buffer */
+  return ptr->mem_buffer + (start_row - ptr->cur_start_row);
+}
+
+
+METHODDEF(JBLOCKARRAY)
+access_virt_barray (j_common_ptr cinfo, jvirt_barray_ptr ptr,
+		    JDIMENSION start_row, JDIMENSION num_rows,
+		    boolean writable)
+/* Access the part of a virtual block array starting at start_row */
+/* and extending for num_rows rows.  writable is true if  */
+/* caller intends to modify the accessed area. */
+{
+  JDIMENSION end_row = start_row + num_rows;
+  JDIMENSION undef_row;
+
+  /* debugging check */
+  if (end_row > ptr->rows_in_array || num_rows > ptr->maxaccess ||
+      ptr->mem_buffer == NULL)
+    ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+
+  /* Make the desired part of the virtual array accessible */
+  if (start_row < ptr->cur_start_row ||
+      end_row > ptr->cur_start_row+ptr->rows_in_mem) {
+    if (! ptr->b_s_open)
+      ERREXIT(cinfo, JERR_VIRTUAL_BUG);
+    /* Flush old buffer contents if necessary */
+    if (ptr->dirty) {
+      do_barray_io(cinfo, ptr, TRUE);
+      ptr->dirty = FALSE;
+    }
+    /* Decide what part of virtual array to access.
+     * Algorithm: if target address > current window, assume forward scan,
+     * load starting at target address.  If target address < current window,
+     * assume backward scan, load so that target area is top of window.
+     * Note that when switching from forward write to forward read, will have
+     * start_row = 0, so the limiting case applies and we load from 0 anyway.
+     */
+    if (start_row > ptr->cur_start_row) {
+      ptr->cur_start_row = start_row;
+    } else {
+      /* use long arithmetic here to avoid overflow & unsigned problems */
+      long ltemp;
+
+      ltemp = (long) end_row - (long) ptr->rows_in_mem;
+      if (ltemp < 0)
+	ltemp = 0;		/* don't fall off front end of file */
+      ptr->cur_start_row = (JDIMENSION) ltemp;
+    }
+    /* Read in the selected part of the array.
+     * During the initial write pass, we will do no actual read
+     * because the selected part is all undefined.
+     */
+    do_barray_io(cinfo, ptr, FALSE);
+  }
+  /* Ensure the accessed part of the array is defined; prezero if needed.
+   * To improve locality of access, we only prezero the part of the array
+   * that the caller is about to access, not the entire in-memory array.
+   */
+  if (ptr->first_undef_row < end_row) {
+    if (ptr->first_undef_row < start_row) {
+      if (writable)		/* writer skipped over a section of array */
+	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+      undef_row = start_row;	/* but reader is allowed to read ahead */
+    } else {
+      undef_row = ptr->first_undef_row;
+    }
+    if (writable)
+      ptr->first_undef_row = end_row;
+    if (ptr->pre_zero) {
+      size_t bytesperrow = (size_t) ptr->blocksperrow * SIZEOF(JBLOCK);
+      undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */
+      end_row -= ptr->cur_start_row;
+      while (undef_row < end_row) {
+	FMEMZERO((void FAR *) ptr->mem_buffer[undef_row], bytesperrow);
+	undef_row++;
+      }
+    } else {
+      if (! writable)		/* reader looking at undefined data */
+	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+    }
+  }
+  /* Flag the buffer dirty if caller will write in it */
+  if (writable)
+    ptr->dirty = TRUE;
+  /* Return address of proper part of the buffer */
+  return ptr->mem_buffer + (start_row - ptr->cur_start_row);
+}
+
+
+/*
+ * Release all objects belonging to a specified pool.
+ */
+
+METHODDEF(void)
+free_pool (j_common_ptr cinfo, int pool_id)
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  small_pool_ptr shdr_ptr;
+  large_pool_ptr lhdr_ptr;
+  size_t space_freed;
+
+  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+
+#ifdef MEM_STATS
+  if (cinfo->err->trace_level > 1)
+    print_mem_stats(cinfo, pool_id); /* print pool's memory usage statistics */
+#endif
+
+  /* If freeing IMAGE pool, close any virtual arrays first */
+  if (pool_id == JPOOL_IMAGE) {
+    jvirt_sarray_ptr sptr;
+    jvirt_barray_ptr bptr;
+
+    for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
+      if (sptr->b_s_open) {	/* there may be no backing store */
+	sptr->b_s_open = FALSE;	/* prevent recursive close if error */
+	(*sptr->b_s_info.close_backing_store) (cinfo, & sptr->b_s_info);
+      }
+    }
+    mem->virt_sarray_list = NULL;
+    for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
+      if (bptr->b_s_open) {	/* there may be no backing store */
+	bptr->b_s_open = FALSE;	/* prevent recursive close if error */
+	(*bptr->b_s_info.close_backing_store) (cinfo, & bptr->b_s_info);
+      }
+    }
+    mem->virt_barray_list = NULL;
+  }
+
+  /* Release large objects */
+  lhdr_ptr = mem->large_list[pool_id];
+  mem->large_list[pool_id] = NULL;
+
+  while (lhdr_ptr != NULL) {
+    large_pool_ptr next_lhdr_ptr = lhdr_ptr->hdr.next;
+    space_freed = lhdr_ptr->hdr.bytes_used +
+		  lhdr_ptr->hdr.bytes_left +
+		  SIZEOF(large_pool_hdr);
+    jpeg_free_large(cinfo, (void FAR *) lhdr_ptr, space_freed);
+    mem->total_space_allocated -= space_freed;
+    lhdr_ptr = next_lhdr_ptr;
+  }
+
+  /* Release small objects */
+  shdr_ptr = mem->small_list[pool_id];
+  mem->small_list[pool_id] = NULL;
+
+  while (shdr_ptr != NULL) {
+    small_pool_ptr next_shdr_ptr = shdr_ptr->hdr.next;
+    space_freed = shdr_ptr->hdr.bytes_used +
+		  shdr_ptr->hdr.bytes_left +
+		  SIZEOF(small_pool_hdr);
+    jpeg_free_small(cinfo, (void *) shdr_ptr, space_freed);
+    mem->total_space_allocated -= space_freed;
+    shdr_ptr = next_shdr_ptr;
+  }
+}
+
+
+/*
+ * Close up shop entirely.
+ * Note that this cannot be called unless cinfo->mem is non-NULL.
+ */
+
+METHODDEF(void)
+self_destruct (j_common_ptr cinfo)
+{
+  int pool;
+
+  /* Close all backing store, release all memory.
+   * Releasing pools in reverse order might help avoid fragmentation
+   * with some (brain-damaged) malloc libraries.
+   */
+  for (pool = JPOOL_NUMPOOLS-1; pool >= JPOOL_PERMANENT; pool--) {
+    free_pool(cinfo, pool);
+  }
+
+  /* Release the memory manager control block too. */
+  jpeg_free_small(cinfo, (void *) cinfo->mem, SIZEOF(my_memory_mgr));
+  cinfo->mem = NULL;		/* ensures I will be called only once */
+
+  jpeg_mem_term(cinfo);		/* system-dependent cleanup */
+}
+
+
+/*
+ * Memory manager initialization.
+ * When this is called, only the error manager pointer is valid in cinfo!
+ */
+
+GLOBAL(void)
+jinit_memory_mgr (j_common_ptr cinfo)
+{
+  my_mem_ptr mem;
+  long max_to_use;
+  int pool;
+  size_t test_mac;
+
+  cinfo->mem = NULL;		/* for safety if init fails */
+
+  /* Check for configuration errors.
+   * SIZEOF(ALIGN_TYPE) should be a power of 2; otherwise, it probably
+   * doesn't reflect any real hardware alignment requirement.
+   * The test is a little tricky: for X>0, X and X-1 have no one-bits
+   * in common if and only if X is a power of 2, ie has only one one-bit.
+   * Some compilers may give an "unreachable code" warning here; ignore it.
+   */
+  if ((SIZEOF(ALIGN_TYPE) & (SIZEOF(ALIGN_TYPE)-1)) != 0)
+    ERREXIT(cinfo, JERR_BAD_ALIGN_TYPE);
+  /* MAX_ALLOC_CHUNK must be representable as type size_t, and must be
+   * a multiple of SIZEOF(ALIGN_TYPE).
+   * Again, an "unreachable code" warning may be ignored here.
+   * But a "constant too large" warning means you need to fix MAX_ALLOC_CHUNK.
+   */
+  test_mac = (size_t) MAX_ALLOC_CHUNK;
+  if ((long) test_mac != MAX_ALLOC_CHUNK ||
+      (MAX_ALLOC_CHUNK % SIZEOF(ALIGN_TYPE)) != 0)
+    ERREXIT(cinfo, JERR_BAD_ALLOC_CHUNK);
+
+  max_to_use = jpeg_mem_init(cinfo); /* system-dependent initialization */
+
+  /* Attempt to allocate memory manager's control block */
+  mem = (my_mem_ptr) jpeg_get_small(cinfo, SIZEOF(my_memory_mgr));
+
+  if (mem == NULL) {
+    jpeg_mem_term(cinfo);	/* system-dependent cleanup */
+    ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 0);
+  }
+
+  /* OK, fill in the method pointers */
+  mem->pub.alloc_small = alloc_small;
+  mem->pub.alloc_large = alloc_large;
+  mem->pub.alloc_sarray = alloc_sarray;
+  mem->pub.alloc_barray = alloc_barray;
+  mem->pub.request_virt_sarray = request_virt_sarray;
+  mem->pub.request_virt_barray = request_virt_barray;
+  mem->pub.realize_virt_arrays = realize_virt_arrays;
+  mem->pub.access_virt_sarray = access_virt_sarray;
+  mem->pub.access_virt_barray = access_virt_barray;
+  mem->pub.free_pool = free_pool;
+  mem->pub.self_destruct = self_destruct;
+
+  /* Make MAX_ALLOC_CHUNK accessible to other modules */
+  mem->pub.max_alloc_chunk = MAX_ALLOC_CHUNK;
+
+  /* Initialize working state */
+  mem->pub.max_memory_to_use = max_to_use;
+
+  for (pool = JPOOL_NUMPOOLS-1; pool >= JPOOL_PERMANENT; pool--) {
+    mem->small_list[pool] = NULL;
+    mem->large_list[pool] = NULL;
+  }
+  mem->virt_sarray_list = NULL;
+  mem->virt_barray_list = NULL;
+
+  mem->total_space_allocated = SIZEOF(my_memory_mgr);
+
+  /* Declare ourselves open for business */
+  cinfo->mem = & mem->pub;
+
+  /* Check for an environment variable JPEGMEM; if found, override the
+   * default max_memory setting from jpeg_mem_init.  Note that the
+   * surrounding application may again override this value.
+   * If your system doesn't support getenv(), define NO_GETENV to disable
+   * this feature.
+   */
+#ifndef NO_GETENV
+  { char * memenv;
+
+    if ((memenv = getenv("JPEGMEM")) != NULL) {
+      char ch = 'x';
+
+      if (sscanf(memenv, "%ld%c", &max_to_use, &ch) > 0) {
+	if (ch == 'm' || ch == 'M')
+	  max_to_use *= 1000L;
+	mem->pub.max_memory_to_use = max_to_use * 1000L;
+      }
+    }
+  }
+#endif
+
+}
diff --git a/libraries/jpeg/jmemsys.h b/libraries/jpeg/jmemsys.h
new file mode 100644
index 000000000..6c3c6d348
--- /dev/null
+++ b/libraries/jpeg/jmemsys.h
@@ -0,0 +1,198 @@
+/*
+ * jmemsys.h
+ *
+ * Copyright (C) 1992-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This include file defines the interface between the system-independent
+ * and system-dependent portions of the JPEG memory manager.  No other
+ * modules need include it.  (The system-independent portion is jmemmgr.c;
+ * there are several different versions of the system-dependent portion.)
+ *
+ * This file works as-is for the system-dependent memory managers supplied
+ * in the IJG distribution.  You may need to modify it if you write a
+ * custom memory manager.  If system-dependent changes are needed in
+ * this file, the best method is to #ifdef them based on a configuration
+ * symbol supplied in jconfig.h, as we have done with USE_MSDOS_MEMMGR
+ * and USE_MAC_MEMMGR.
+ */
+
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_get_small		jGetSmall
+#define jpeg_free_small		jFreeSmall
+#define jpeg_get_large		jGetLarge
+#define jpeg_free_large		jFreeLarge
+#define jpeg_mem_available	jMemAvail
+#define jpeg_open_backing_store	jOpenBackStore
+#define jpeg_mem_init		jMemInit
+#define jpeg_mem_term		jMemTerm
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+
+/*
+ * These two functions are used to allocate and release small chunks of
+ * memory.  (Typically the total amount requested through jpeg_get_small is
+ * no more than 20K or so; this will be requested in chunks of a few K each.)
+ * Behavior should be the same as for the standard library functions malloc
+ * and free; in particular, jpeg_get_small must return NULL on failure.
+ * On most systems, these ARE malloc and free.  jpeg_free_small is passed the
+ * size of the object being freed, just in case it's needed.
+ * On an 80x86 machine using small-data memory model, these manage near heap.
+ */
+
+EXTERN(void *) jpeg_get_small JPP((j_common_ptr cinfo, size_t sizeofobject));
+EXTERN(void) jpeg_free_small JPP((j_common_ptr cinfo, void * object,
+				  size_t sizeofobject));
+
+/*
+ * These two functions are used to allocate and release large chunks of
+ * memory (up to the total free space designated by jpeg_mem_available).
+ * The interface is the same as above, except that on an 80x86 machine,
+ * far pointers are used.  On most other machines these are identical to
+ * the jpeg_get/free_small routines; but we keep them separate anyway,
+ * in case a different allocation strategy is desirable for large chunks.
+ */
+
+EXTERN(void FAR *) jpeg_get_large JPP((j_common_ptr cinfo,
+				       size_t sizeofobject));
+EXTERN(void) jpeg_free_large JPP((j_common_ptr cinfo, void FAR * object,
+				  size_t sizeofobject));
+
+/*
+ * The macro MAX_ALLOC_CHUNK designates the maximum number of bytes that may
+ * be requested in a single call to jpeg_get_large (and jpeg_get_small for that
+ * matter, but that case should never come into play).  This macro is needed
+ * to model the 64Kb-segment-size limit of far addressing on 80x86 machines.
+ * On those machines, we expect that jconfig.h will provide a proper value.
+ * On machines with 32-bit flat address spaces, any large constant may be used.
+ *
+ * NB: jmemmgr.c expects that MAX_ALLOC_CHUNK will be representable as type
+ * size_t and will be a multiple of sizeof(align_type).
+ */
+
+#ifndef MAX_ALLOC_CHUNK		/* may be overridden in jconfig.h */
+#define MAX_ALLOC_CHUNK  1000000000L
+#endif
+
+/*
+ * This routine computes the total space still available for allocation by
+ * jpeg_get_large.  If more space than this is needed, backing store will be
+ * used.  NOTE: any memory already allocated must not be counted.
+ *
+ * There is a minimum space requirement, corresponding to the minimum
+ * feasible buffer sizes; jmemmgr.c will request that much space even if
+ * jpeg_mem_available returns zero.  The maximum space needed, enough to hold
+ * all working storage in memory, is also passed in case it is useful.
+ * Finally, the total space already allocated is passed.  If no better
+ * method is available, cinfo->mem->max_memory_to_use - already_allocated
+ * is often a suitable calculation.
+ *
+ * It is OK for jpeg_mem_available to underestimate the space available
+ * (that'll just lead to more backing-store access than is really necessary).
+ * However, an overestimate will lead to failure.  Hence it's wise to subtract
+ * a slop factor from the true available space.  5% should be enough.
+ *
+ * On machines with lots of virtual memory, any large constant may be returned.
+ * Conversely, zero may be returned to always use the minimum amount of memory.
+ */
+
+EXTERN(long) jpeg_mem_available JPP((j_common_ptr cinfo,
+				     long min_bytes_needed,
+				     long max_bytes_needed,
+				     long already_allocated));
+
+
+/*
+ * This structure holds whatever state is needed to access a single
+ * backing-store object.  The read/write/close method pointers are called
+ * by jmemmgr.c to manipulate the backing-store object; all other fields
+ * are private to the system-dependent backing store routines.
+ */
+
+#define TEMP_NAME_LENGTH   64	/* max length of a temporary file's name */
+
+
+#ifdef USE_MSDOS_MEMMGR		/* DOS-specific junk */
+
+typedef unsigned short XMSH;	/* type of extended-memory handles */
+typedef unsigned short EMSH;	/* type of expanded-memory handles */
+
+typedef union {
+  short file_handle;		/* DOS file handle if it's a temp file */
+  XMSH xms_handle;		/* handle if it's a chunk of XMS */
+  EMSH ems_handle;		/* handle if it's a chunk of EMS */
+} handle_union;
+
+#endif /* USE_MSDOS_MEMMGR */
+
+#ifdef USE_MAC_MEMMGR		/* Mac-specific junk */
+#include <Files.h>
+#endif /* USE_MAC_MEMMGR */
+
+
+typedef struct backing_store_struct * backing_store_ptr;
+
+typedef struct backing_store_struct {
+  /* Methods for reading/writing/closing this backing-store object */
+  JMETHOD(void, read_backing_store, (j_common_ptr cinfo,
+				     backing_store_ptr info,
+				     void FAR * buffer_address,
+				     long file_offset, long byte_count));
+  JMETHOD(void, write_backing_store, (j_common_ptr cinfo,
+				      backing_store_ptr info,
+				      void FAR * buffer_address,
+				      long file_offset, long byte_count));
+  JMETHOD(void, close_backing_store, (j_common_ptr cinfo,
+				      backing_store_ptr info));
+
+  /* Private fields for system-dependent backing-store management */
+#ifdef USE_MSDOS_MEMMGR
+  /* For the MS-DOS manager (jmemdos.c), we need: */
+  handle_union handle;		/* reference to backing-store storage object */
+  char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */
+#else
+#ifdef USE_MAC_MEMMGR
+  /* For the Mac manager (jmemmac.c), we need: */
+  short temp_file;		/* file reference number to temp file */
+  FSSpec tempSpec;		/* the FSSpec for the temp file */
+  char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */
+#else
+  /* For a typical implementation with temp files, we need: */
+  FILE * temp_file;		/* stdio reference to temp file */
+  char temp_name[TEMP_NAME_LENGTH]; /* name of temp file */
+#endif
+#endif
+} backing_store_info;
+
+
+/*
+ * Initial opening of a backing-store object.  This must fill in the
+ * read/write/close pointers in the object.  The read/write routines
+ * may take an error exit if the specified maximum file size is exceeded.
+ * (If jpeg_mem_available always returns a large value, this routine can
+ * just take an error exit.)
+ */
+
+EXTERN(void) jpeg_open_backing_store JPP((j_common_ptr cinfo,
+					  backing_store_ptr info,
+					  long total_bytes_needed));
+
+
+/*
+ * These routines take care of any system-dependent initialization and
+ * cleanup required.  jpeg_mem_init will be called before anything is
+ * allocated (and, therefore, nothing in cinfo is of use except the error
+ * manager pointer).  It should return a suitable default value for
+ * max_memory_to_use; this may subsequently be overridden by the surrounding
+ * application.  (Note that max_memory_to_use is only important if
+ * jpeg_mem_available chooses to consult it ... no one else will.)
+ * jpeg_mem_term may assume that all requested memory has been freed and that
+ * all opened backing-store objects have been closed.
+ */
+
+EXTERN(long) jpeg_mem_init JPP((j_common_ptr cinfo));
+EXTERN(void) jpeg_mem_term JPP((j_common_ptr cinfo));
diff --git a/libraries/jpeg/jmorecfg.h b/libraries/jpeg/jmorecfg.h
new file mode 100644
index 000000000..679d68bdc
--- /dev/null
+++ b/libraries/jpeg/jmorecfg.h
@@ -0,0 +1,446 @@
+/*
+ * jmorecfg.h
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 1997-2013 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains additional configuration options that customize the
+ * JPEG software for special applications or support machine-dependent
+ * optimizations.  Most users will not need to touch this file.
+ */
+
+
+/*
+ * Define BITS_IN_JSAMPLE as either
+ *   8   for 8-bit sample values (the usual setting)
+ *   9   for 9-bit sample values
+ *   10  for 10-bit sample values
+ *   11  for 11-bit sample values
+ *   12  for 12-bit sample values
+ * Only 8, 9, 10, 11, and 12 bits sample data precision are supported for
+ * full-feature DCT processing.  Further depths up to 16-bit may be added
+ * later for the lossless modes of operation.
+ * Run-time selection and conversion of data precision will be added later
+ * and are currently not supported, sorry.
+ * Exception:  The transcoding part (jpegtran) supports all settings in a
+ * single instance, since it operates on the level of DCT coefficients and
+ * not sample values.  The DCT coefficients are of the same type (16 bits)
+ * in all cases (see below).
+ */
+
+#define BITS_IN_JSAMPLE  8	/* use 8, 9, 10, 11, or 12 */
+
+
+/*
+ * Maximum number of components (color channels) allowed in JPEG image.
+ * To meet the letter of the JPEG spec, set this to 255.  However, darn
+ * few applications need more than 4 channels (maybe 5 for CMYK + alpha
+ * mask).  We recommend 10 as a reasonable compromise; use 4 if you are
+ * really short on memory.  (Each allowed component costs a hundred or so
+ * bytes of storage, whether actually used in an image or not.)
+ */
+
+#define MAX_COMPONENTS  10	/* maximum number of image components */
+
+
+/*
+ * Basic data types.
+ * You may need to change these if you have a machine with unusual data
+ * type sizes; for example, "char" not 8 bits, "short" not 16 bits,
+ * or "long" not 32 bits.  We don't care whether "int" is 16 or 32 bits,
+ * but it had better be at least 16.
+ */
+
+/* Representation of a single sample (pixel element value).
+ * We frequently allocate large arrays of these, so it's important to keep
+ * them small.  But if you have memory to burn and access to char or short
+ * arrays is very slow on your hardware, you might want to change these.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+/* JSAMPLE should be the smallest type that will hold the values 0..255.
+ * You can use a signed char by having GETJSAMPLE mask it with 0xFF.
+ */
+
+#ifdef HAVE_UNSIGNED_CHAR
+
+typedef unsigned char JSAMPLE;
+#define GETJSAMPLE(value)  ((int) (value))
+
+#else /* not HAVE_UNSIGNED_CHAR */
+
+typedef char JSAMPLE;
+#ifdef CHAR_IS_UNSIGNED
+#define GETJSAMPLE(value)  ((int) (value))
+#else
+#define GETJSAMPLE(value)  ((int) (value) & 0xFF)
+#endif /* CHAR_IS_UNSIGNED */
+
+#endif /* HAVE_UNSIGNED_CHAR */
+
+#define MAXJSAMPLE	255
+#define CENTERJSAMPLE	128
+
+#endif /* BITS_IN_JSAMPLE == 8 */
+
+
+#if BITS_IN_JSAMPLE == 9
+/* JSAMPLE should be the smallest type that will hold the values 0..511.
+ * On nearly all machines "short" will do nicely.
+ */
+
+typedef short JSAMPLE;
+#define GETJSAMPLE(value)  ((int) (value))
+
+#define MAXJSAMPLE	511
+#define CENTERJSAMPLE	256
+
+#endif /* BITS_IN_JSAMPLE == 9 */
+
+
+#if BITS_IN_JSAMPLE == 10
+/* JSAMPLE should be the smallest type that will hold the values 0..1023.
+ * On nearly all machines "short" will do nicely.
+ */
+
+typedef short JSAMPLE;
+#define GETJSAMPLE(value)  ((int) (value))
+
+#define MAXJSAMPLE	1023
+#define CENTERJSAMPLE	512
+
+#endif /* BITS_IN_JSAMPLE == 10 */
+
+
+#if BITS_IN_JSAMPLE == 11
+/* JSAMPLE should be the smallest type that will hold the values 0..2047.
+ * On nearly all machines "short" will do nicely.
+ */
+
+typedef short JSAMPLE;
+#define GETJSAMPLE(value)  ((int) (value))
+
+#define MAXJSAMPLE	2047
+#define CENTERJSAMPLE	1024
+
+#endif /* BITS_IN_JSAMPLE == 11 */
+
+
+#if BITS_IN_JSAMPLE == 12
+/* JSAMPLE should be the smallest type that will hold the values 0..4095.
+ * On nearly all machines "short" will do nicely.
+ */
+
+typedef short JSAMPLE;
+#define GETJSAMPLE(value)  ((int) (value))
+
+#define MAXJSAMPLE	4095
+#define CENTERJSAMPLE	2048
+
+#endif /* BITS_IN_JSAMPLE == 12 */
+
+
+/* Representation of a DCT frequency coefficient.
+ * This should be a signed value of at least 16 bits; "short" is usually OK.
+ * Again, we allocate large arrays of these, but you can change to int
+ * if you have memory to burn and "short" is really slow.
+ */
+
+typedef short JCOEF;
+
+
+/* Compressed datastreams are represented as arrays of JOCTET.
+ * These must be EXACTLY 8 bits wide, at least once they are written to
+ * external storage.  Note that when using the stdio data source/destination
+ * managers, this is also the data type passed to fread/fwrite.
+ */
+
+#ifdef HAVE_UNSIGNED_CHAR
+
+typedef unsigned char JOCTET;
+#define GETJOCTET(value)  (value)
+
+#else /* not HAVE_UNSIGNED_CHAR */
+
+typedef char JOCTET;
+#ifdef CHAR_IS_UNSIGNED
+#define GETJOCTET(value)  (value)
+#else
+#define GETJOCTET(value)  ((value) & 0xFF)
+#endif /* CHAR_IS_UNSIGNED */
+
+#endif /* HAVE_UNSIGNED_CHAR */
+
+
+/* These typedefs are used for various table entries and so forth.
+ * They must be at least as wide as specified; but making them too big
+ * won't cost a huge amount of memory, so we don't provide special
+ * extraction code like we did for JSAMPLE.  (In other words, these
+ * typedefs live at a different point on the speed/space tradeoff curve.)
+ */
+
+/* UINT8 must hold at least the values 0..255. */
+
+#ifdef HAVE_UNSIGNED_CHAR
+typedef unsigned char UINT8;
+#else /* not HAVE_UNSIGNED_CHAR */
+#ifdef CHAR_IS_UNSIGNED
+typedef char UINT8;
+#else /* not CHAR_IS_UNSIGNED */
+typedef short UINT8;
+#endif /* CHAR_IS_UNSIGNED */
+#endif /* HAVE_UNSIGNED_CHAR */
+
+/* UINT16 must hold at least the values 0..65535. */
+
+#ifdef HAVE_UNSIGNED_SHORT
+typedef unsigned short UINT16;
+#else /* not HAVE_UNSIGNED_SHORT */
+typedef unsigned int UINT16;
+#endif /* HAVE_UNSIGNED_SHORT */
+
+/* INT16 must hold at least the values -32768..32767. */
+
+#ifndef XMD_H			/* X11/xmd.h correctly defines INT16 */
+typedef short INT16;
+#endif
+
+/* INT32 must hold at least signed 32-bit values. */
+
+#ifndef XMD_H			/* X11/xmd.h correctly defines INT32 */
+#ifndef _BASETSD_H_		/* Microsoft defines it in basetsd.h */
+#ifndef _BASETSD_H		/* MinGW is slightly different */
+#ifndef QGLOBAL_H		/* Qt defines it in qglobal.h */
+typedef long INT32;
+#endif
+#endif
+#endif
+#endif
+
+/* Datatype used for image dimensions.  The JPEG standard only supports
+ * images up to 64K*64K due to 16-bit fields in SOF markers.  Therefore
+ * "unsigned int" is sufficient on all machines.  However, if you need to
+ * handle larger images and you don't mind deviating from the spec, you
+ * can change this datatype.
+ */
+
+typedef unsigned int JDIMENSION;
+
+#define JPEG_MAX_DIMENSION  65500L  /* a tad under 64K to prevent overflows */
+
+
+/* These macros are used in all function definitions and extern declarations.
+ * You could modify them if you need to change function linkage conventions;
+ * in particular, you'll need to do that to make the library a Windows DLL.
+ * Another application is to make all functions global for use with debuggers
+ * or code profilers that require it.
+ */
+
+/* a function called through method pointers: */
+#define METHODDEF(type)		static type
+/* a function used only in its module: */
+#define LOCAL(type)		static type
+/* a function referenced thru EXTERNs: */
+#define GLOBAL(type)		type
+/* a reference to a GLOBAL function: */
+#define EXTERN(type)		extern type
+
+
+/* This macro is used to declare a "method", that is, a function pointer.
+ * We want to supply prototype parameters if the compiler can cope.
+ * Note that the arglist parameter must be parenthesized!
+ * Again, you can customize this if you need special linkage keywords.
+ */
+
+#ifdef HAVE_PROTOTYPES
+#define JMETHOD(type,methodname,arglist)  type (*methodname) arglist
+#else
+#define JMETHOD(type,methodname,arglist)  type (*methodname) ()
+#endif
+
+
+/* The noreturn type identifier is used to declare functions
+ * which cannot return.
+ * Compilers can thus create more optimized code and perform
+ * better checks for warnings and errors.
+ * Static analyzer tools can make improved inferences about
+ * execution paths and are prevented from giving false alerts.
+ *
+ * Unfortunately, the proposed specifications of corresponding
+ * extensions in the Dec 2011 ISO C standard revision (C11),
+ * GCC, MSVC, etc. are not viable.
+ * Thus we introduce a user defined type to declare noreturn
+ * functions at least for clarity.  A proper compiler would
+ * have a suitable noreturn type to match in place of void.
+ */
+
+#ifndef HAVE_NORETURN_T
+typedef void noreturn_t;
+#endif
+
+
+/* Here is the pseudo-keyword for declaring pointers that must be "far"
+ * on 80x86 machines.  Most of the specialized coding for 80x86 is handled
+ * by just saying "FAR *" where such a pointer is needed.  In a few places
+ * explicit coding is needed; see uses of the NEED_FAR_POINTERS symbol.
+ */
+
+#ifndef FAR
+#ifdef NEED_FAR_POINTERS
+#define FAR  far
+#else
+#define FAR
+#endif
+#endif
+
+
+/*
+ * On a few systems, type boolean and/or its values FALSE, TRUE may appear
+ * in standard header files.  Or you may have conflicts with application-
+ * specific header files that you want to include together with these files.
+ * Defining HAVE_BOOLEAN before including jpeglib.h should make it work.
+ */
+
+#ifndef HAVE_BOOLEAN
+#if defined FALSE || defined TRUE || defined QGLOBAL_H
+/* Qt3 defines FALSE and TRUE as "const" variables in qglobal.h */
+typedef int boolean;
+#ifndef FALSE			/* in case these macros already exist */
+#define FALSE	0		/* values of boolean */
+#endif
+#ifndef TRUE
+#define TRUE	1
+#endif
+#else
+typedef enum { FALSE = 0, TRUE = 1 } boolean;
+#endif
+#endif
+
+
+/*
+ * The remaining options affect code selection within the JPEG library,
+ * but they don't need to be visible to most applications using the library.
+ * To minimize application namespace pollution, the symbols won't be
+ * defined unless JPEG_INTERNALS or JPEG_INTERNAL_OPTIONS has been defined.
+ */
+
+#ifdef JPEG_INTERNALS
+#define JPEG_INTERNAL_OPTIONS
+#endif
+
+#ifdef JPEG_INTERNAL_OPTIONS
+
+
+/*
+ * These defines indicate whether to include various optional functions.
+ * Undefining some of these symbols will produce a smaller but less capable
+ * library.  Note that you can leave certain source files out of the
+ * compilation/linking process if you've #undef'd the corresponding symbols.
+ * (You may HAVE to do that if your compiler doesn't like null source files.)
+ */
+
+/* Capability options common to encoder and decoder: */
+
+#define DCT_ISLOW_SUPPORTED	/* slow but accurate integer algorithm */
+#define DCT_IFAST_SUPPORTED	/* faster, less accurate integer method */
+#define DCT_FLOAT_SUPPORTED	/* floating-point: accurate, fast on fast HW */
+
+/* Encoder capability options: */
+
+#define C_ARITH_CODING_SUPPORTED    /* Arithmetic coding back end? */
+#define C_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
+#define C_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN)*/
+#define DCT_SCALING_SUPPORTED	    /* Input rescaling via DCT? (Requires DCT_ISLOW)*/
+#define ENTROPY_OPT_SUPPORTED	    /* Optimization of entropy coding parms? */
+/* Note: if you selected more than 8-bit data precision, it is dangerous to
+ * turn off ENTROPY_OPT_SUPPORTED.  The standard Huffman tables are only
+ * good for 8-bit precision, so arithmetic coding is recommended for higher
+ * precision.  The Huffman encoder normally uses entropy optimization to
+ * compute usable tables for higher precision.  Otherwise, you'll have to
+ * supply different default Huffman tables.
+ * The exact same statements apply for progressive JPEG: the default tables
+ * don't work for progressive mode.  (This may get fixed, however.)
+ */
+#define INPUT_SMOOTHING_SUPPORTED   /* Input image smoothing option? */
+
+/* Decoder capability options: */
+
+#define D_ARITH_CODING_SUPPORTED    /* Arithmetic coding back end? */
+#define D_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
+#define D_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN)*/
+#define IDCT_SCALING_SUPPORTED	    /* Output rescaling via IDCT? (Requires DCT_ISLOW)*/
+#define SAVE_MARKERS_SUPPORTED	    /* jpeg_save_markers() needed? */
+#define BLOCK_SMOOTHING_SUPPORTED   /* Block smoothing? (Progressive only) */
+#undef  UPSAMPLE_SCALING_SUPPORTED  /* Output rescaling at upsample stage? */
+#define UPSAMPLE_MERGING_SUPPORTED  /* Fast path for sloppy upsampling? */
+#define QUANT_1PASS_SUPPORTED	    /* 1-pass color quantization? */
+#define QUANT_2PASS_SUPPORTED	    /* 2-pass color quantization? */
+
+/* more capability options later, no doubt */
+
+
+/*
+ * Ordering of RGB data in scanlines passed to or from the application.
+ * If your application wants to deal with data in the order B,G,R, just
+ * change these macros.  You can also deal with formats such as R,G,B,X
+ * (one extra byte per pixel) by changing RGB_PIXELSIZE.  Note that changing
+ * the offsets will also change the order in which colormap data is organized.
+ * RESTRICTIONS:
+ * 1. The sample applications cjpeg,djpeg do NOT support modified RGB formats.
+ * 2. The color quantizer modules will not behave desirably if RGB_PIXELSIZE
+ *    is not 3 (they don't understand about dummy color components!).  So you
+ *    can't use color quantization if you change that value.
+ */
+
+#define RGB_RED		0	/* Offset of Red in an RGB scanline element */
+#define RGB_GREEN	1	/* Offset of Green */
+#define RGB_BLUE	2	/* Offset of Blue */
+#define RGB_PIXELSIZE	3	/* JSAMPLEs per RGB scanline element */
+
+
+/* Definitions for speed-related optimizations. */
+
+
+/* If your compiler supports inline functions, define INLINE
+ * as the inline keyword; otherwise define it as empty.
+ */
+
+#ifndef INLINE
+#ifdef __GNUC__			/* for instance, GNU C knows about inline */
+#define INLINE __inline__
+#endif
+#ifndef INLINE
+#define INLINE			/* default is to define it as empty */
+#endif
+#endif
+
+
+/* On some machines (notably 68000 series) "int" is 32 bits, but multiplying
+ * two 16-bit shorts is faster than multiplying two ints.  Define MULTIPLIER
+ * as short on such a machine.  MULTIPLIER must be at least 16 bits wide.
+ */
+
+#ifndef MULTIPLIER
+#define MULTIPLIER  int		/* type for fastest integer multiply */
+#endif
+
+
+/* FAST_FLOAT should be either float or double, whichever is done faster
+ * by your compiler.  (Note that this type is only used in the floating point
+ * DCT routines, so it only matters if you've defined DCT_FLOAT_SUPPORTED.)
+ * Typically, float is faster in ANSI C compilers, while double is faster in
+ * pre-ANSI compilers (because they insist on converting to double anyway).
+ * The code below therefore chooses float if we have ANSI-style prototypes.
+ */
+
+#ifndef FAST_FLOAT
+#ifdef HAVE_PROTOTYPES
+#define FAST_FLOAT  float
+#else
+#define FAST_FLOAT  double
+#endif
+#endif
+
+#endif /* JPEG_INTERNAL_OPTIONS */
diff --git a/libraries/jpeg/jpegint.h b/libraries/jpeg/jpegint.h
new file mode 100644
index 000000000..e312e1af9
--- /dev/null
+++ b/libraries/jpeg/jpegint.h
@@ -0,0 +1,439 @@
+/*
+ * jpegint.h
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 1997-2017 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file provides common declarations for the various JPEG modules.
+ * These declarations are considered internal to the JPEG library; most
+ * applications using the library shouldn't need to include this file.
+ */
+
+
+/* Declarations for both compression & decompression */
+
+typedef enum {			/* Operating modes for buffer controllers */
+	JBUF_PASS_THRU,		/* Plain stripwise operation */
+	/* Remaining modes require a full-image buffer to have been created */
+	JBUF_SAVE_SOURCE,	/* Run source subobject only, save output */
+	JBUF_CRANK_DEST,	/* Run dest subobject only, using saved data */
+	JBUF_SAVE_AND_PASS	/* Run both subobjects, save output */
+} J_BUF_MODE;
+
+/* Values of global_state field (jdapi.c has some dependencies on ordering!) */
+#define CSTATE_START	100	/* after create_compress */
+#define CSTATE_SCANNING	101	/* start_compress done, write_scanlines OK */
+#define CSTATE_RAW_OK	102	/* start_compress done, write_raw_data OK */
+#define CSTATE_WRCOEFS	103	/* jpeg_write_coefficients done */
+#define DSTATE_START	200	/* after create_decompress */
+#define DSTATE_INHEADER	201	/* reading header markers, no SOS yet */
+#define DSTATE_READY	202	/* found SOS, ready for start_decompress */
+#define DSTATE_PRELOAD	203	/* reading multiscan file in start_decompress*/
+#define DSTATE_PRESCAN	204	/* performing dummy pass for 2-pass quant */
+#define DSTATE_SCANNING	205	/* start_decompress done, read_scanlines OK */
+#define DSTATE_RAW_OK	206	/* start_decompress done, read_raw_data OK */
+#define DSTATE_BUFIMAGE	207	/* expecting jpeg_start_output */
+#define DSTATE_BUFPOST	208	/* looking for SOS/EOI in jpeg_finish_output */
+#define DSTATE_RDCOEFS	209	/* reading file in jpeg_read_coefficients */
+#define DSTATE_STOPPING	210	/* looking for EOI in jpeg_finish_decompress */
+
+
+/* Declarations for compression modules */
+
+/* Master control module */
+struct jpeg_comp_master {
+  JMETHOD(void, prepare_for_pass, (j_compress_ptr cinfo));
+  JMETHOD(void, pass_startup, (j_compress_ptr cinfo));
+  JMETHOD(void, finish_pass, (j_compress_ptr cinfo));
+
+  /* State variables made visible to other modules */
+  boolean call_pass_startup;	/* True if pass_startup must be called */
+  boolean is_last_pass;		/* True during last pass */
+};
+
+/* Main buffer control (downsampled-data buffer) */
+struct jpeg_c_main_controller {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(void, process_data, (j_compress_ptr cinfo,
+			       JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+			       JDIMENSION in_rows_avail));
+};
+
+/* Compression preprocessing (downsampling input buffer control) */
+struct jpeg_c_prep_controller {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(void, pre_process_data, (j_compress_ptr cinfo,
+				   JSAMPARRAY input_buf,
+				   JDIMENSION *in_row_ctr,
+				   JDIMENSION in_rows_avail,
+				   JSAMPIMAGE output_buf,
+				   JDIMENSION *out_row_group_ctr,
+				   JDIMENSION out_row_groups_avail));
+};
+
+/* Coefficient buffer control */
+struct jpeg_c_coef_controller {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(boolean, compress_data, (j_compress_ptr cinfo,
+				   JSAMPIMAGE input_buf));
+};
+
+/* Colorspace conversion */
+struct jpeg_color_converter {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
+  JMETHOD(void, color_convert, (j_compress_ptr cinfo,
+				JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+				JDIMENSION output_row, int num_rows));
+};
+
+/* Downsampling */
+struct jpeg_downsampler {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
+  JMETHOD(void, downsample, (j_compress_ptr cinfo,
+			     JSAMPIMAGE input_buf, JDIMENSION in_row_index,
+			     JSAMPIMAGE output_buf,
+			     JDIMENSION out_row_group_index));
+
+  boolean need_context_rows;	/* TRUE if need rows above & below */
+};
+
+/* Forward DCT (also controls coefficient quantization) */
+typedef JMETHOD(void, forward_DCT_ptr,
+		(j_compress_ptr cinfo, jpeg_component_info * compptr,
+		 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
+		 JDIMENSION start_row, JDIMENSION start_col,
+		 JDIMENSION num_blocks));
+
+struct jpeg_forward_dct {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
+  /* It is useful to allow each component to have a separate FDCT method. */
+  forward_DCT_ptr forward_DCT[MAX_COMPONENTS];
+};
+
+/* Entropy encoding */
+struct jpeg_entropy_encoder {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo, boolean gather_statistics));
+  JMETHOD(boolean, encode_mcu, (j_compress_ptr cinfo, JBLOCKROW *MCU_data));
+  JMETHOD(void, finish_pass, (j_compress_ptr cinfo));
+};
+
+/* Marker writing */
+struct jpeg_marker_writer {
+  JMETHOD(void, write_file_header, (j_compress_ptr cinfo));
+  JMETHOD(void, write_frame_header, (j_compress_ptr cinfo));
+  JMETHOD(void, write_scan_header, (j_compress_ptr cinfo));
+  JMETHOD(void, write_file_trailer, (j_compress_ptr cinfo));
+  JMETHOD(void, write_tables_only, (j_compress_ptr cinfo));
+  /* These routines are exported to allow insertion of extra markers */
+  /* Probably only COM and APPn markers should be written this way */
+  JMETHOD(void, write_marker_header, (j_compress_ptr cinfo, int marker,
+				      unsigned int datalen));
+  JMETHOD(void, write_marker_byte, (j_compress_ptr cinfo, int val));
+};
+
+
+/* Declarations for decompression modules */
+
+/* Master control module */
+struct jpeg_decomp_master {
+  JMETHOD(void, prepare_for_output_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, finish_output_pass, (j_decompress_ptr cinfo));
+
+  /* State variables made visible to other modules */
+  boolean is_dummy_pass;	/* True during 1st pass for 2-pass quant */
+};
+
+/* Input control module */
+struct jpeg_input_controller {
+  JMETHOD(int, consume_input, (j_decompress_ptr cinfo));
+  JMETHOD(void, reset_input_controller, (j_decompress_ptr cinfo));
+  JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, finish_input_pass, (j_decompress_ptr cinfo));
+
+  /* State variables made visible to other modules */
+  boolean has_multiple_scans;	/* True if file has multiple scans */
+  boolean eoi_reached;		/* True when EOI has been consumed */
+};
+
+/* Main buffer control (downsampled-data buffer) */
+struct jpeg_d_main_controller {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(void, process_data, (j_decompress_ptr cinfo,
+			       JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+			       JDIMENSION out_rows_avail));
+};
+
+/* Coefficient buffer control */
+struct jpeg_d_coef_controller {
+  JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo));
+  JMETHOD(int, consume_data, (j_decompress_ptr cinfo));
+  JMETHOD(void, start_output_pass, (j_decompress_ptr cinfo));
+  JMETHOD(int, decompress_data, (j_decompress_ptr cinfo,
+				 JSAMPIMAGE output_buf));
+  /* Pointer to array of coefficient virtual arrays, or NULL if none */
+  jvirt_barray_ptr *coef_arrays;
+};
+
+/* Decompression postprocessing (color quantization buffer control) */
+struct jpeg_d_post_controller {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(void, post_process_data, (j_decompress_ptr cinfo,
+				    JSAMPIMAGE input_buf,
+				    JDIMENSION *in_row_group_ctr,
+				    JDIMENSION in_row_groups_avail,
+				    JSAMPARRAY output_buf,
+				    JDIMENSION *out_row_ctr,
+				    JDIMENSION out_rows_avail));
+};
+
+/* Marker reading & parsing */
+struct jpeg_marker_reader {
+  JMETHOD(void, reset_marker_reader, (j_decompress_ptr cinfo));
+  /* Read markers until SOS or EOI.
+   * Returns same codes as are defined for jpeg_consume_input:
+   * JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
+   */
+  JMETHOD(int, read_markers, (j_decompress_ptr cinfo));
+  /* Read a restart marker --- exported for use by entropy decoder only */
+  jpeg_marker_parser_method read_restart_marker;
+
+  /* State of marker reader --- nominally internal, but applications
+   * supplying COM or APPn handlers might like to know the state.
+   */
+  boolean saw_SOI;		/* found SOI? */
+  boolean saw_SOF;		/* found SOF? */
+  int next_restart_num;		/* next restart number expected (0-7) */
+  unsigned int discarded_bytes;	/* # of bytes skipped looking for a marker */
+};
+
+/* Entropy decoding */
+struct jpeg_entropy_decoder {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
+  JMETHOD(boolean, decode_mcu, (j_decompress_ptr cinfo, JBLOCKROW *MCU_data));
+  JMETHOD(void, finish_pass, (j_decompress_ptr cinfo));
+};
+
+/* Inverse DCT (also performs dequantization) */
+typedef JMETHOD(void, inverse_DCT_method_ptr,
+		(j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col));
+
+struct jpeg_inverse_dct {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
+  /* It is useful to allow each component to have a separate IDCT method. */
+  inverse_DCT_method_ptr inverse_DCT[MAX_COMPONENTS];
+};
+
+/* Upsampling (note that upsampler must also call color converter) */
+struct jpeg_upsampler {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, upsample, (j_decompress_ptr cinfo,
+			   JSAMPIMAGE input_buf,
+			   JDIMENSION *in_row_group_ctr,
+			   JDIMENSION in_row_groups_avail,
+			   JSAMPARRAY output_buf,
+			   JDIMENSION *out_row_ctr,
+			   JDIMENSION out_rows_avail));
+
+  boolean need_context_rows;	/* TRUE if need rows above & below */
+};
+
+/* Colorspace conversion */
+struct jpeg_color_deconverter {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, color_convert, (j_decompress_ptr cinfo,
+				JSAMPIMAGE input_buf, JDIMENSION input_row,
+				JSAMPARRAY output_buf, int num_rows));
+};
+
+/* Color quantization or color precision reduction */
+struct jpeg_color_quantizer {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, boolean is_pre_scan));
+  JMETHOD(void, color_quantize, (j_decompress_ptr cinfo,
+				 JSAMPARRAY input_buf, JSAMPARRAY output_buf,
+				 int num_rows));
+  JMETHOD(void, finish_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, new_color_map, (j_decompress_ptr cinfo));
+};
+
+
+/* Definition of range extension bits for decompression processes.
+ * See the comments with prepare_range_limit_table (in jdmaster.c)
+ * for more info.
+ * The recommended default value for normal applications is 2.
+ * Applications with special requirements may use a different value.
+ * For example, Ghostscript wants to use 3 for proper handling of
+ * wacky images with oversize coefficient values.
+ */
+
+#define RANGE_BITS	2
+#define RANGE_CENTER	(CENTERJSAMPLE << RANGE_BITS)
+
+
+/* Miscellaneous useful macros */
+
+#undef MAX
+#define MAX(a,b)	((a) > (b) ? (a) : (b))
+#undef MIN
+#define MIN(a,b)	((a) < (b) ? (a) : (b))
+
+
+/* We assume that right shift corresponds to signed division by 2 with
+ * rounding towards minus infinity.  This is correct for typical "arithmetic
+ * shift" instructions that shift in copies of the sign bit.  But some
+ * C compilers implement >> with an unsigned shift.  For these machines you
+ * must define RIGHT_SHIFT_IS_UNSIGNED.
+ * RIGHT_SHIFT provides a proper signed right shift of an INT32 quantity.
+ * It is only applied with constant shift counts.  SHIFT_TEMPS must be
+ * included in the variables of any routine using RIGHT_SHIFT.
+ */
+
+#ifdef RIGHT_SHIFT_IS_UNSIGNED
+#define SHIFT_TEMPS	INT32 shift_temp;
+#define RIGHT_SHIFT(x,shft)  \
+	((shift_temp = (x)) < 0 ? \
+	 (shift_temp >> (shft)) | ((~((INT32) 0)) << (32-(shft))) : \
+	 (shift_temp >> (shft)))
+#else
+#define SHIFT_TEMPS
+#define RIGHT_SHIFT(x,shft)	((x) >> (shft))
+#endif
+
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jinit_compress_master	jICompress
+#define jinit_c_master_control	jICMaster
+#define jinit_c_main_controller	jICMainC
+#define jinit_c_prep_controller	jICPrepC
+#define jinit_c_coef_controller	jICCoefC
+#define jinit_color_converter	jICColor
+#define jinit_downsampler	jIDownsampler
+#define jinit_forward_dct	jIFDCT
+#define jinit_huff_encoder	jIHEncoder
+#define jinit_arith_encoder	jIAEncoder
+#define jinit_marker_writer	jIMWriter
+#define jinit_master_decompress	jIDMaster
+#define jinit_d_main_controller	jIDMainC
+#define jinit_d_coef_controller	jIDCoefC
+#define jinit_d_post_controller	jIDPostC
+#define jinit_input_controller	jIInCtlr
+#define jinit_marker_reader	jIMReader
+#define jinit_huff_decoder	jIHDecoder
+#define jinit_arith_decoder	jIADecoder
+#define jinit_inverse_dct	jIIDCT
+#define jinit_upsampler		jIUpsampler
+#define jinit_color_deconverter	jIDColor
+#define jinit_1pass_quantizer	jI1Quant
+#define jinit_2pass_quantizer	jI2Quant
+#define jinit_merged_upsampler	jIMUpsampler
+#define jinit_memory_mgr	jIMemMgr
+#define jdiv_round_up		jDivRound
+#define jround_up		jRound
+#define jzero_far		jZeroFar
+#define jcopy_sample_rows	jCopySamples
+#define jcopy_block_row		jCopyBlocks
+#define jpeg_zigzag_order	jZIGTable
+#define jpeg_natural_order	jZAGTable
+#define jpeg_natural_order7	jZAG7Table
+#define jpeg_natural_order6	jZAG6Table
+#define jpeg_natural_order5	jZAG5Table
+#define jpeg_natural_order4	jZAG4Table
+#define jpeg_natural_order3	jZAG3Table
+#define jpeg_natural_order2	jZAG2Table
+#define jpeg_aritab		jAriTab
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+
+/* On normal machines we can apply MEMCOPY() and MEMZERO() to sample arrays
+ * and coefficient-block arrays.  This won't work on 80x86 because the arrays
+ * are FAR and we're assuming a small-pointer memory model.  However, some
+ * DOS compilers provide far-pointer versions of memcpy() and memset() even
+ * in the small-model libraries.  These will be used if USE_FMEM is defined.
+ * Otherwise, the routines in jutils.c do it the hard way.
+ */
+
+#ifndef NEED_FAR_POINTERS	/* normal case, same as regular macro */
+#define FMEMZERO(target,size)	MEMZERO(target,size)
+#else				/* 80x86 case */
+#ifdef USE_FMEM
+#define FMEMZERO(target,size)	_fmemset((void FAR *)(target), 0, (size_t)(size))
+#else
+EXTERN(void) jzero_far JPP((void FAR * target, size_t bytestozero));
+#define FMEMZERO(target,size)	jzero_far(target, size)
+#endif
+#endif
+
+
+/* Compression module initialization routines */
+EXTERN(void) jinit_compress_master JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_c_master_control JPP((j_compress_ptr cinfo,
+					 boolean transcode_only));
+EXTERN(void) jinit_c_main_controller JPP((j_compress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_c_prep_controller JPP((j_compress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_c_coef_controller JPP((j_compress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_color_converter JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_downsampler JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_forward_dct JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_huff_encoder JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_arith_encoder JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_marker_writer JPP((j_compress_ptr cinfo));
+/* Decompression module initialization routines */
+EXTERN(void) jinit_master_decompress JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_d_main_controller JPP((j_decompress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_d_coef_controller JPP((j_decompress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_d_post_controller JPP((j_decompress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_input_controller JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_marker_reader JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_huff_decoder JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_arith_decoder JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_inverse_dct JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_upsampler JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_color_deconverter JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_1pass_quantizer JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_2pass_quantizer JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_merged_upsampler JPP((j_decompress_ptr cinfo));
+/* Memory manager initialization */
+EXTERN(void) jinit_memory_mgr JPP((j_common_ptr cinfo));
+
+/* Utility routines in jutils.c */
+EXTERN(long) jdiv_round_up JPP((long a, long b));
+EXTERN(long) jround_up JPP((long a, long b));
+EXTERN(void) jcopy_sample_rows JPP((JSAMPARRAY input_array, int source_row,
+				    JSAMPARRAY output_array, int dest_row,
+				    int num_rows, JDIMENSION num_cols));
+EXTERN(void) jcopy_block_row JPP((JBLOCKROW input_row, JBLOCKROW output_row,
+				  JDIMENSION num_blocks));
+/* Constant tables in jutils.c */
+#if 0				/* This table is not actually needed in v6a */
+extern const int jpeg_zigzag_order[]; /* natural coef order to zigzag order */
+#endif
+extern const int jpeg_natural_order[]; /* zigzag coef order to natural order */
+extern const int jpeg_natural_order7[]; /* zz to natural order for 7x7 block */
+extern const int jpeg_natural_order6[]; /* zz to natural order for 6x6 block */
+extern const int jpeg_natural_order5[]; /* zz to natural order for 5x5 block */
+extern const int jpeg_natural_order4[]; /* zz to natural order for 4x4 block */
+extern const int jpeg_natural_order3[]; /* zz to natural order for 3x3 block */
+extern const int jpeg_natural_order2[]; /* zz to natural order for 2x2 block */
+
+/* Arithmetic coding probability estimation tables in jaricom.c */
+extern const INT32 jpeg_aritab[];
+
+/* Suppress undefined-structure complaints if necessary. */
+
+#ifdef INCOMPLETE_TYPES_BROKEN
+#ifndef AM_MEMORY_MANAGER	/* only jmemmgr.c defines these */
+struct jvirt_sarray_control { long dummy; };
+struct jvirt_barray_control { long dummy; };
+#endif
+#endif /* INCOMPLETE_TYPES_BROKEN */
diff --git a/libraries/jpeg/jpeglib.h b/libraries/jpeg/jpeglib.h
new file mode 100644
index 000000000..4bd985316
--- /dev/null
+++ b/libraries/jpeg/jpeglib.h
@@ -0,0 +1,1180 @@
+/*
+ * jpeglib.h
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * Modified 2002-2017 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file defines the application interface for the JPEG library.
+ * Most applications using the library need only include this file,
+ * and perhaps jerror.h if they want to know the exact error codes.
+ */
+
+#ifndef JPEGLIB_H
+#define JPEGLIB_H
+
+/*
+ * First we include the configuration files that record how this
+ * installation of the JPEG library is set up.  jconfig.h can be
+ * generated automatically for many systems.  jmorecfg.h contains
+ * manual configuration options that most people need not worry about.
+ */
+
+#ifndef JCONFIG_INCLUDED	/* in case jinclude.h already did */
+#include "jconfig.h"		/* widely used configuration options */
+#endif
+#include "jmorecfg.h"		/* seldom changed options */
+
+
+#ifdef __cplusplus
+#ifndef DONT_USE_EXTERN_C
+extern "C" {
+#endif
+#endif
+
+/* Version IDs for the JPEG library.
+ * Might be useful for tests like "#if JPEG_LIB_VERSION >= 90".
+ */
+
+#define JPEG_LIB_VERSION        90	/* Compatibility version 9.0 */
+#define JPEG_LIB_VERSION_MAJOR  9
+#define JPEG_LIB_VERSION_MINOR  3
+
+
+/* Various constants determining the sizes of things.
+ * All of these are specified by the JPEG standard,
+ * so don't change them if you want to be compatible.
+ */
+
+#define DCTSIZE		    8	/* The basic DCT block is 8x8 coefficients */
+#define DCTSIZE2	    64	/* DCTSIZE squared; # of elements in a block */
+#define NUM_QUANT_TBLS      4	/* Quantization tables are numbered 0..3 */
+#define NUM_HUFF_TBLS       4	/* Huffman tables are numbered 0..3 */
+#define NUM_ARITH_TBLS      16	/* Arith-coding tables are numbered 0..15 */
+#define MAX_COMPS_IN_SCAN   4	/* JPEG limit on # of components in one scan */
+#define MAX_SAMP_FACTOR     4	/* JPEG limit on sampling factors */
+/* Unfortunately, some bozo at Adobe saw no reason to be bound by the standard;
+ * the PostScript DCT filter can emit files with many more than 10 blocks/MCU.
+ * If you happen to run across such a file, you can up D_MAX_BLOCKS_IN_MCU
+ * to handle it.  We even let you do this from the jconfig.h file.  However,
+ * we strongly discourage changing C_MAX_BLOCKS_IN_MCU; just because Adobe
+ * sometimes emits noncompliant files doesn't mean you should too.
+ */
+#define C_MAX_BLOCKS_IN_MCU   10 /* compressor's limit on blocks per MCU */
+#ifndef D_MAX_BLOCKS_IN_MCU
+#define D_MAX_BLOCKS_IN_MCU   10 /* decompressor's limit on blocks per MCU */
+#endif
+
+
+/* Data structures for images (arrays of samples and of DCT coefficients).
+ * On 80x86 machines, the image arrays are too big for near pointers,
+ * but the pointer arrays can fit in near memory.
+ */
+
+typedef JSAMPLE FAR *JSAMPROW;	/* ptr to one image row of pixel samples. */
+typedef JSAMPROW *JSAMPARRAY;	/* ptr to some rows (a 2-D sample array) */
+typedef JSAMPARRAY *JSAMPIMAGE;	/* a 3-D sample array: top index is color */
+
+typedef JCOEF JBLOCK[DCTSIZE2];	/* one block of coefficients */
+typedef JBLOCK FAR *JBLOCKROW;	/* pointer to one row of coefficient blocks */
+typedef JBLOCKROW *JBLOCKARRAY;		/* a 2-D array of coefficient blocks */
+typedef JBLOCKARRAY *JBLOCKIMAGE;	/* a 3-D array of coefficient blocks */
+
+typedef JCOEF FAR *JCOEFPTR;	/* useful in a couple of places */
+
+
+/* Types for JPEG compression parameters and working tables. */
+
+
+/* DCT coefficient quantization tables. */
+
+typedef struct {
+  /* This array gives the coefficient quantizers in natural array order
+   * (not the zigzag order in which they are stored in a JPEG DQT marker).
+   * CAUTION: IJG versions prior to v6a kept this array in zigzag order.
+   */
+  UINT16 quantval[DCTSIZE2];	/* quantization step for each coefficient */
+  /* This field is used only during compression.  It's initialized FALSE when
+   * the table is created, and set TRUE when it's been output to the file.
+   * You could suppress output of a table by setting this to TRUE.
+   * (See jpeg_suppress_tables for an example.)
+   */
+  boolean sent_table;		/* TRUE when table has been output */
+} JQUANT_TBL;
+
+
+/* Huffman coding tables. */
+
+typedef struct {
+  /* These two fields directly represent the contents of a JPEG DHT marker */
+  UINT8 bits[17];		/* bits[k] = # of symbols with codes of */
+				/* length k bits; bits[0] is unused */
+  UINT8 huffval[256];		/* The symbols, in order of incr code length */
+  /* This field is used only during compression.  It's initialized FALSE when
+   * the table is created, and set TRUE when it's been output to the file.
+   * You could suppress output of a table by setting this to TRUE.
+   * (See jpeg_suppress_tables for an example.)
+   */
+  boolean sent_table;		/* TRUE when table has been output */
+} JHUFF_TBL;
+
+
+/* Basic info about one component (color channel). */
+
+typedef struct {
+  /* These values are fixed over the whole image. */
+  /* For compression, they must be supplied by parameter setup; */
+  /* for decompression, they are read from the SOF marker. */
+  int component_id;		/* identifier for this component (0..255) */
+  int component_index;		/* its index in SOF or cinfo->comp_info[] */
+  int h_samp_factor;		/* horizontal sampling factor (1..4) */
+  int v_samp_factor;		/* vertical sampling factor (1..4) */
+  int quant_tbl_no;		/* quantization table selector (0..3) */
+  /* These values may vary between scans. */
+  /* For compression, they must be supplied by parameter setup; */
+  /* for decompression, they are read from the SOS marker. */
+  /* The decompressor output side may not use these variables. */
+  int dc_tbl_no;		/* DC entropy table selector (0..3) */
+  int ac_tbl_no;		/* AC entropy table selector (0..3) */
+
+  /* Remaining fields should be treated as private by applications. */
+
+  /* These values are computed during compression or decompression startup: */
+  /* Component's size in DCT blocks.
+   * Any dummy blocks added to complete an MCU are not counted; therefore
+   * these values do not depend on whether a scan is interleaved or not.
+   */
+  JDIMENSION width_in_blocks;
+  JDIMENSION height_in_blocks;
+  /* Size of a DCT block in samples,
+   * reflecting any scaling we choose to apply during the DCT step.
+   * Values from 1 to 16 are supported.
+   * Note that different components may receive different DCT scalings.
+   */
+  int DCT_h_scaled_size;
+  int DCT_v_scaled_size;
+  /* The downsampled dimensions are the component's actual, unpadded number
+   * of samples at the main buffer (preprocessing/compression interface);
+   * DCT scaling is included, so
+   * downsampled_width =
+   *   ceil(image_width * Hi/Hmax * DCT_h_scaled_size/block_size)
+   * and similarly for height.
+   */
+  JDIMENSION downsampled_width;	 /* actual width in samples */
+  JDIMENSION downsampled_height; /* actual height in samples */
+  /* For decompression, in cases where some of the components will be
+   * ignored (eg grayscale output from YCbCr image), we can skip most
+   * computations for the unused components.
+   * For compression, some of the components will need further quantization
+   * scale by factor of 2 after DCT (eg BG_YCC output from normal RGB input).
+   * The field is first set TRUE for decompression, FALSE for compression
+   * in initial_setup, and then adapted in color conversion setup.
+   */
+  boolean component_needed;
+
+  /* These values are computed before starting a scan of the component. */
+  /* The decompressor output side may not use these variables. */
+  int MCU_width;		/* number of blocks per MCU, horizontally */
+  int MCU_height;		/* number of blocks per MCU, vertically */
+  int MCU_blocks;		/* MCU_width * MCU_height */
+  int MCU_sample_width;	/* MCU width in samples: MCU_width * DCT_h_scaled_size */
+  int last_col_width;		/* # of non-dummy blocks across in last MCU */
+  int last_row_height;		/* # of non-dummy blocks down in last MCU */
+
+  /* Saved quantization table for component; NULL if none yet saved.
+   * See jdinput.c comments about the need for this information.
+   * This field is currently used only for decompression.
+   */
+  JQUANT_TBL * quant_table;
+
+  /* Private per-component storage for DCT or IDCT subsystem. */
+  void * dct_table;
+} jpeg_component_info;
+
+
+/* The script for encoding a multiple-scan file is an array of these: */
+
+typedef struct {
+  int comps_in_scan;		/* number of components encoded in this scan */
+  int component_index[MAX_COMPS_IN_SCAN]; /* their SOF/comp_info[] indexes */
+  int Ss, Se;			/* progressive JPEG spectral selection parms */
+  int Ah, Al;			/* progressive JPEG successive approx. parms */
+} jpeg_scan_info;
+
+/* The decompressor can save APPn and COM markers in a list of these: */
+
+typedef struct jpeg_marker_struct FAR * jpeg_saved_marker_ptr;
+
+struct jpeg_marker_struct {
+  jpeg_saved_marker_ptr next;	/* next in list, or NULL */
+  UINT8 marker;			/* marker code: JPEG_COM, or JPEG_APP0+n */
+  unsigned int original_length;	/* # bytes of data in the file */
+  unsigned int data_length;	/* # bytes of data saved at data[] */
+  JOCTET FAR * data;		/* the data contained in the marker */
+  /* the marker length word is not counted in data_length or original_length */
+};
+
+/* Known color spaces. */
+
+typedef enum {
+	JCS_UNKNOWN,		/* error/unspecified */
+	JCS_GRAYSCALE,		/* monochrome */
+	JCS_RGB,		/* red/green/blue, standard RGB (sRGB) */
+	JCS_YCbCr,		/* Y/Cb/Cr (also known as YUV), standard YCC */
+	JCS_CMYK,		/* C/M/Y/K */
+	JCS_YCCK,		/* Y/Cb/Cr/K */
+	JCS_BG_RGB,		/* big gamut red/green/blue, bg-sRGB */
+	JCS_BG_YCC		/* big gamut Y/Cb/Cr, bg-sYCC */
+} J_COLOR_SPACE;
+
+/* Supported color transforms. */
+
+typedef enum {
+	JCT_NONE           = 0,
+	JCT_SUBTRACT_GREEN = 1
+} J_COLOR_TRANSFORM;
+
+/* DCT/IDCT algorithm options. */
+
+typedef enum {
+	JDCT_ISLOW,		/* slow but accurate integer algorithm */
+	JDCT_IFAST,		/* faster, less accurate integer method */
+	JDCT_FLOAT		/* floating-point: accurate, fast on fast HW */
+} J_DCT_METHOD;
+
+#ifndef JDCT_DEFAULT		/* may be overridden in jconfig.h */
+#define JDCT_DEFAULT  JDCT_ISLOW
+#endif
+#ifndef JDCT_FASTEST		/* may be overridden in jconfig.h */
+#define JDCT_FASTEST  JDCT_IFAST
+#endif
+
+/* Dithering options for decompression. */
+
+typedef enum {
+	JDITHER_NONE,		/* no dithering */
+	JDITHER_ORDERED,	/* simple ordered dither */
+	JDITHER_FS		/* Floyd-Steinberg error diffusion dither */
+} J_DITHER_MODE;
+
+
+/* Common fields between JPEG compression and decompression master structs. */
+
+#define jpeg_common_fields \
+  struct jpeg_error_mgr * err;	/* Error handler module */\
+  struct jpeg_memory_mgr * mem;	/* Memory manager module */\
+  struct jpeg_progress_mgr * progress; /* Progress monitor, or NULL if none */\
+  void * client_data;		/* Available for use by application */\
+  boolean is_decompressor;	/* So common code can tell which is which */\
+  int global_state		/* For checking call sequence validity */
+
+/* Routines that are to be used by both halves of the library are declared
+ * to receive a pointer to this structure.  There are no actual instances of
+ * jpeg_common_struct, only of jpeg_compress_struct and jpeg_decompress_struct.
+ */
+struct jpeg_common_struct {
+  jpeg_common_fields;		/* Fields common to both master struct types */
+  /* Additional fields follow in an actual jpeg_compress_struct or
+   * jpeg_decompress_struct.  All three structs must agree on these
+   * initial fields!  (This would be a lot cleaner in C++.)
+   */
+};
+
+typedef struct jpeg_common_struct * j_common_ptr;
+typedef struct jpeg_compress_struct * j_compress_ptr;
+typedef struct jpeg_decompress_struct * j_decompress_ptr;
+
+
+/* Master record for a compression instance */
+
+struct jpeg_compress_struct {
+  jpeg_common_fields;		/* Fields shared with jpeg_decompress_struct */
+
+  /* Destination for compressed data */
+  struct jpeg_destination_mgr * dest;
+
+  /* Description of source image --- these fields must be filled in by
+   * outer application before starting compression.  in_color_space must
+   * be correct before you can even call jpeg_set_defaults().
+   */
+
+  JDIMENSION image_width;	/* input image width */
+  JDIMENSION image_height;	/* input image height */
+  int input_components;		/* # of color components in input image */
+  J_COLOR_SPACE in_color_space;	/* colorspace of input image */
+
+  double input_gamma;		/* image gamma of input image */
+
+  /* Compression parameters --- these fields must be set before calling
+   * jpeg_start_compress().  We recommend calling jpeg_set_defaults() to
+   * initialize everything to reasonable defaults, then changing anything
+   * the application specifically wants to change.  That way you won't get
+   * burnt when new parameters are added.  Also note that there are several
+   * helper routines to simplify changing parameters.
+   */
+
+  unsigned int scale_num, scale_denom; /* fraction by which to scale image */
+
+  JDIMENSION jpeg_width;	/* scaled JPEG image width */
+  JDIMENSION jpeg_height;	/* scaled JPEG image height */
+  /* Dimensions of actual JPEG image that will be written to file,
+   * derived from input dimensions by scaling factors above.
+   * These fields are computed by jpeg_start_compress().
+   * You can also use jpeg_calc_jpeg_dimensions() to determine these values
+   * in advance of calling jpeg_start_compress().
+   */
+
+  int data_precision;		/* bits of precision in image data */
+
+  int num_components;		/* # of color components in JPEG image */
+  J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */
+
+  jpeg_component_info * comp_info;
+  /* comp_info[i] describes component that appears i'th in SOF */
+
+  JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS];
+  int q_scale_factor[NUM_QUANT_TBLS];
+  /* ptrs to coefficient quantization tables, or NULL if not defined,
+   * and corresponding scale factors (percentage, initialized 100).
+   */
+
+  JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  /* ptrs to Huffman coding tables, or NULL if not defined */
+
+  UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */
+  UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */
+  UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */
+
+  int num_scans;		/* # of entries in scan_info array */
+  const jpeg_scan_info * scan_info; /* script for multi-scan file, or NULL */
+  /* The default value of scan_info is NULL, which causes a single-scan
+   * sequential JPEG file to be emitted.  To create a multi-scan file,
+   * set num_scans and scan_info to point to an array of scan definitions.
+   */
+
+  boolean raw_data_in;		/* TRUE=caller supplies downsampled data */
+  boolean arith_code;		/* TRUE=arithmetic coding, FALSE=Huffman */
+  boolean optimize_coding;	/* TRUE=optimize entropy encoding parms */
+  boolean CCIR601_sampling;	/* TRUE=first samples are cosited */
+  boolean do_fancy_downsampling; /* TRUE=apply fancy downsampling */
+  int smoothing_factor;		/* 1..100, or 0 for no input smoothing */
+  J_DCT_METHOD dct_method;	/* DCT algorithm selector */
+
+  /* The restart interval can be specified in absolute MCUs by setting
+   * restart_interval, or in MCU rows by setting restart_in_rows
+   * (in which case the correct restart_interval will be figured
+   * for each scan).
+   */
+  unsigned int restart_interval; /* MCUs per restart, or 0 for no restart */
+  int restart_in_rows;		/* if > 0, MCU rows per restart interval */
+
+  /* Parameters controlling emission of special markers. */
+
+  boolean write_JFIF_header;	/* should a JFIF marker be written? */
+  UINT8 JFIF_major_version;	/* What to write for the JFIF version number */
+  UINT8 JFIF_minor_version;
+  /* These three values are not used by the JPEG code, merely copied */
+  /* into the JFIF APP0 marker.  density_unit can be 0 for unknown, */
+  /* 1 for dots/inch, or 2 for dots/cm.  Note that the pixel aspect */
+  /* ratio is defined by X_density/Y_density even when density_unit=0. */
+  UINT8 density_unit;		/* JFIF code for pixel size units */
+  UINT16 X_density;		/* Horizontal pixel density */
+  UINT16 Y_density;		/* Vertical pixel density */
+  boolean write_Adobe_marker;	/* should an Adobe marker be written? */
+
+  J_COLOR_TRANSFORM color_transform;
+  /* Color transform identifier, writes LSE marker if nonzero */
+
+  /* State variable: index of next scanline to be written to
+   * jpeg_write_scanlines().  Application may use this to control its
+   * processing loop, e.g., "while (next_scanline < image_height)".
+   */
+
+  JDIMENSION next_scanline;	/* 0 .. image_height-1  */
+
+  /* Remaining fields are known throughout compressor, but generally
+   * should not be touched by a surrounding application.
+   */
+
+  /*
+   * These fields are computed during compression startup
+   */
+  boolean progressive_mode;	/* TRUE if scan script uses progressive mode */
+  int max_h_samp_factor;	/* largest h_samp_factor */
+  int max_v_samp_factor;	/* largest v_samp_factor */
+
+  int min_DCT_h_scaled_size;	/* smallest DCT_h_scaled_size of any component */
+  int min_DCT_v_scaled_size;	/* smallest DCT_v_scaled_size of any component */
+
+  JDIMENSION total_iMCU_rows;	/* # of iMCU rows to be input to coef ctlr */
+  /* The coefficient controller receives data in units of MCU rows as defined
+   * for fully interleaved scans (whether the JPEG file is interleaved or not).
+   * There are v_samp_factor * DCT_v_scaled_size sample rows of each component
+   * in an "iMCU" (interleaved MCU) row.
+   */
+
+  /*
+   * These fields are valid during any one scan.
+   * They describe the components and MCUs actually appearing in the scan.
+   */
+  int comps_in_scan;		/* # of JPEG components in this scan */
+  jpeg_component_info * cur_comp_info[MAX_COMPS_IN_SCAN];
+  /* *cur_comp_info[i] describes component that appears i'th in SOS */
+
+  JDIMENSION MCUs_per_row;	/* # of MCUs across the image */
+  JDIMENSION MCU_rows_in_scan;	/* # of MCU rows in the image */
+
+  int blocks_in_MCU;		/* # of DCT blocks per MCU */
+  int MCU_membership[C_MAX_BLOCKS_IN_MCU];
+  /* MCU_membership[i] is index in cur_comp_info of component owning */
+  /* i'th block in an MCU */
+
+  int Ss, Se, Ah, Al;		/* progressive JPEG parameters for scan */
+
+  int block_size;		/* the basic DCT block size: 1..16 */
+  const int * natural_order;	/* natural-order position array */
+  int lim_Se;			/* min( Se, DCTSIZE2-1 ) */
+
+  /*
+   * Links to compression subobjects (methods and private variables of modules)
+   */
+  struct jpeg_comp_master * master;
+  struct jpeg_c_main_controller * main;
+  struct jpeg_c_prep_controller * prep;
+  struct jpeg_c_coef_controller * coef;
+  struct jpeg_marker_writer * marker;
+  struct jpeg_color_converter * cconvert;
+  struct jpeg_downsampler * downsample;
+  struct jpeg_forward_dct * fdct;
+  struct jpeg_entropy_encoder * entropy;
+  jpeg_scan_info * script_space; /* workspace for jpeg_simple_progression */
+  int script_space_size;
+};
+
+
+/* Master record for a decompression instance */
+
+struct jpeg_decompress_struct {
+  jpeg_common_fields;		/* Fields shared with jpeg_compress_struct */
+
+  /* Source of compressed data */
+  struct jpeg_source_mgr * src;
+
+  /* Basic description of image --- filled in by jpeg_read_header(). */
+  /* Application may inspect these values to decide how to process image. */
+
+  JDIMENSION image_width;	/* nominal image width (from SOF marker) */
+  JDIMENSION image_height;	/* nominal image height */
+  int num_components;		/* # of color components in JPEG image */
+  J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */
+
+  /* Decompression processing parameters --- these fields must be set before
+   * calling jpeg_start_decompress().  Note that jpeg_read_header() initializes
+   * them to default values.
+   */
+
+  J_COLOR_SPACE out_color_space; /* colorspace for output */
+
+  unsigned int scale_num, scale_denom; /* fraction by which to scale image */
+
+  double output_gamma;		/* image gamma wanted in output */
+
+  boolean buffered_image;	/* TRUE=multiple output passes */
+  boolean raw_data_out;		/* TRUE=downsampled data wanted */
+
+  J_DCT_METHOD dct_method;	/* IDCT algorithm selector */
+  boolean do_fancy_upsampling;	/* TRUE=apply fancy upsampling */
+  boolean do_block_smoothing;	/* TRUE=apply interblock smoothing */
+
+  boolean quantize_colors;	/* TRUE=colormapped output wanted */
+  /* the following are ignored if not quantize_colors: */
+  J_DITHER_MODE dither_mode;	/* type of color dithering to use */
+  boolean two_pass_quantize;	/* TRUE=use two-pass color quantization */
+  int desired_number_of_colors;	/* max # colors to use in created colormap */
+  /* these are significant only in buffered-image mode: */
+  boolean enable_1pass_quant;	/* enable future use of 1-pass quantizer */
+  boolean enable_external_quant;/* enable future use of external colormap */
+  boolean enable_2pass_quant;	/* enable future use of 2-pass quantizer */
+
+  /* Description of actual output image that will be returned to application.
+   * These fields are computed by jpeg_start_decompress().
+   * You can also use jpeg_calc_output_dimensions() to determine these values
+   * in advance of calling jpeg_start_decompress().
+   */
+
+  JDIMENSION output_width;	/* scaled image width */
+  JDIMENSION output_height;	/* scaled image height */
+  int out_color_components;	/* # of color components in out_color_space */
+  int output_components;	/* # of color components returned */
+  /* output_components is 1 (a colormap index) when quantizing colors;
+   * otherwise it equals out_color_components.
+   */
+  int rec_outbuf_height;	/* min recommended height of scanline buffer */
+  /* If the buffer passed to jpeg_read_scanlines() is less than this many rows
+   * high, space and time will be wasted due to unnecessary data copying.
+   * Usually rec_outbuf_height will be 1 or 2, at most 4.
+   */
+
+  /* When quantizing colors, the output colormap is described by these fields.
+   * The application can supply a colormap by setting colormap non-NULL before
+   * calling jpeg_start_decompress; otherwise a colormap is created during
+   * jpeg_start_decompress or jpeg_start_output.
+   * The map has out_color_components rows and actual_number_of_colors columns.
+   */
+  int actual_number_of_colors;	/* number of entries in use */
+  JSAMPARRAY colormap;		/* The color map as a 2-D pixel array */
+
+  /* State variables: these variables indicate the progress of decompression.
+   * The application may examine these but must not modify them.
+   */
+
+  /* Row index of next scanline to be read from jpeg_read_scanlines().
+   * Application may use this to control its processing loop, e.g.,
+   * "while (output_scanline < output_height)".
+   */
+  JDIMENSION output_scanline;	/* 0 .. output_height-1  */
+
+  /* Current input scan number and number of iMCU rows completed in scan.
+   * These indicate the progress of the decompressor input side.
+   */
+  int input_scan_number;	/* Number of SOS markers seen so far */
+  JDIMENSION input_iMCU_row;	/* Number of iMCU rows completed */
+
+  /* The "output scan number" is the notional scan being displayed by the
+   * output side.  The decompressor will not allow output scan/row number
+   * to get ahead of input scan/row, but it can fall arbitrarily far behind.
+   */
+  int output_scan_number;	/* Nominal scan number being displayed */
+  JDIMENSION output_iMCU_row;	/* Number of iMCU rows read */
+
+  /* Current progression status.  coef_bits[c][i] indicates the precision
+   * with which component c's DCT coefficient i (in zigzag order) is known.
+   * It is -1 when no data has yet been received, otherwise it is the point
+   * transform (shift) value for the most recent scan of the coefficient
+   * (thus, 0 at completion of the progression).
+   * This pointer is NULL when reading a non-progressive file.
+   */
+  int (*coef_bits)[DCTSIZE2];	/* -1 or current Al value for each coef */
+
+  /* Internal JPEG parameters --- the application usually need not look at
+   * these fields.  Note that the decompressor output side may not use
+   * any parameters that can change between scans.
+   */
+
+  /* Quantization and Huffman tables are carried forward across input
+   * datastreams when processing abbreviated JPEG datastreams.
+   */
+
+  JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS];
+  /* ptrs to coefficient quantization tables, or NULL if not defined */
+
+  JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  /* ptrs to Huffman coding tables, or NULL if not defined */
+
+  /* These parameters are never carried across datastreams, since they
+   * are given in SOF/SOS markers or defined to be reset by SOI.
+   */
+
+  int data_precision;		/* bits of precision in image data */
+
+  jpeg_component_info * comp_info;
+  /* comp_info[i] describes component that appears i'th in SOF */
+
+  boolean is_baseline;		/* TRUE if Baseline SOF0 encountered */
+  boolean progressive_mode;	/* TRUE if SOFn specifies progressive mode */
+  boolean arith_code;		/* TRUE=arithmetic coding, FALSE=Huffman */
+
+  UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */
+  UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */
+  UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */
+
+  unsigned int restart_interval; /* MCUs per restart interval, or 0 for no restart */
+
+  /* These fields record data obtained from optional markers recognized by
+   * the JPEG library.
+   */
+  boolean saw_JFIF_marker;	/* TRUE iff a JFIF APP0 marker was found */
+  /* Data copied from JFIF marker; only valid if saw_JFIF_marker is TRUE: */
+  UINT8 JFIF_major_version;	/* JFIF version number */
+  UINT8 JFIF_minor_version;
+  UINT8 density_unit;		/* JFIF code for pixel size units */
+  UINT16 X_density;		/* Horizontal pixel density */
+  UINT16 Y_density;		/* Vertical pixel density */
+  boolean saw_Adobe_marker;	/* TRUE iff an Adobe APP14 marker was found */
+  UINT8 Adobe_transform;	/* Color transform code from Adobe marker */
+
+  J_COLOR_TRANSFORM color_transform;
+  /* Color transform identifier derived from LSE marker, otherwise zero */
+
+  boolean CCIR601_sampling;	/* TRUE=first samples are cosited */
+
+  /* Aside from the specific data retained from APPn markers known to the
+   * library, the uninterpreted contents of any or all APPn and COM markers
+   * can be saved in a list for examination by the application.
+   */
+  jpeg_saved_marker_ptr marker_list; /* Head of list of saved markers */
+
+  /* Remaining fields are known throughout decompressor, but generally
+   * should not be touched by a surrounding application.
+   */
+
+  /*
+   * These fields are computed during decompression startup
+   */
+  int max_h_samp_factor;	/* largest h_samp_factor */
+  int max_v_samp_factor;	/* largest v_samp_factor */
+
+  int min_DCT_h_scaled_size;	/* smallest DCT_h_scaled_size of any component */
+  int min_DCT_v_scaled_size;	/* smallest DCT_v_scaled_size of any component */
+
+  JDIMENSION total_iMCU_rows;	/* # of iMCU rows in image */
+  /* The coefficient controller's input and output progress is measured in
+   * units of "iMCU" (interleaved MCU) rows.  These are the same as MCU rows
+   * in fully interleaved JPEG scans, but are used whether the scan is
+   * interleaved or not.  We define an iMCU row as v_samp_factor DCT block
+   * rows of each component.  Therefore, the IDCT output contains
+   * v_samp_factor * DCT_v_scaled_size sample rows of a component per iMCU row.
+   */
+
+  JSAMPLE * sample_range_limit; /* table for fast range-limiting */
+
+  /*
+   * These fields are valid during any one scan.
+   * They describe the components and MCUs actually appearing in the scan.
+   * Note that the decompressor output side must not use these fields.
+   */
+  int comps_in_scan;		/* # of JPEG components in this scan */
+  jpeg_component_info * cur_comp_info[MAX_COMPS_IN_SCAN];
+  /* *cur_comp_info[i] describes component that appears i'th in SOS */
+
+  JDIMENSION MCUs_per_row;	/* # of MCUs across the image */
+  JDIMENSION MCU_rows_in_scan;	/* # of MCU rows in the image */
+
+  int blocks_in_MCU;		/* # of DCT blocks per MCU */
+  int MCU_membership[D_MAX_BLOCKS_IN_MCU];
+  /* MCU_membership[i] is index in cur_comp_info of component owning */
+  /* i'th block in an MCU */
+
+  int Ss, Se, Ah, Al;		/* progressive JPEG parameters for scan */
+
+  /* These fields are derived from Se of first SOS marker.
+   */
+  int block_size;		/* the basic DCT block size: 1..16 */
+  const int * natural_order; /* natural-order position array for entropy decode */
+  int lim_Se;			/* min( Se, DCTSIZE2-1 ) for entropy decode */
+
+  /* This field is shared between entropy decoder and marker parser.
+   * It is either zero or the code of a JPEG marker that has been
+   * read from the data source, but has not yet been processed.
+   */
+  int unread_marker;
+
+  /*
+   * Links to decompression subobjects (methods, private variables of modules)
+   */
+  struct jpeg_decomp_master * master;
+  struct jpeg_d_main_controller * main;
+  struct jpeg_d_coef_controller * coef;
+  struct jpeg_d_post_controller * post;
+  struct jpeg_input_controller * inputctl;
+  struct jpeg_marker_reader * marker;
+  struct jpeg_entropy_decoder * entropy;
+  struct jpeg_inverse_dct * idct;
+  struct jpeg_upsampler * upsample;
+  struct jpeg_color_deconverter * cconvert;
+  struct jpeg_color_quantizer * cquantize;
+};
+
+
+/* "Object" declarations for JPEG modules that may be supplied or called
+ * directly by the surrounding application.
+ * As with all objects in the JPEG library, these structs only define the
+ * publicly visible methods and state variables of a module.  Additional
+ * private fields may exist after the public ones.
+ */
+
+
+/* Error handler object */
+
+struct jpeg_error_mgr {
+  /* Error exit handler: does not return to caller */
+  JMETHOD(noreturn_t, error_exit, (j_common_ptr cinfo));
+  /* Conditionally emit a trace or warning message */
+  JMETHOD(void, emit_message, (j_common_ptr cinfo, int msg_level));
+  /* Routine that actually outputs a trace or error message */
+  JMETHOD(void, output_message, (j_common_ptr cinfo));
+  /* Format a message string for the most recent JPEG error or message */
+  JMETHOD(void, format_message, (j_common_ptr cinfo, char * buffer));
+#define JMSG_LENGTH_MAX  200	/* recommended size of format_message buffer */
+  /* Reset error state variables at start of a new image */
+  JMETHOD(void, reset_error_mgr, (j_common_ptr cinfo));
+
+  /* The message ID code and any parameters are saved here.
+   * A message can have one string parameter or up to 8 int parameters.
+   */
+  int msg_code;
+#define JMSG_STR_PARM_MAX  80
+  union {
+    int i[8];
+    char s[JMSG_STR_PARM_MAX];
+  } msg_parm;
+
+  /* Standard state variables for error facility */
+
+  int trace_level;		/* max msg_level that will be displayed */
+
+  /* For recoverable corrupt-data errors, we emit a warning message,
+   * but keep going unless emit_message chooses to abort.  emit_message
+   * should count warnings in num_warnings.  The surrounding application
+   * can check for bad data by seeing if num_warnings is nonzero at the
+   * end of processing.
+   */
+  long num_warnings;		/* number of corrupt-data warnings */
+
+  /* These fields point to the table(s) of error message strings.
+   * An application can change the table pointer to switch to a different
+   * message list (typically, to change the language in which errors are
+   * reported).  Some applications may wish to add additional error codes
+   * that will be handled by the JPEG library error mechanism; the second
+   * table pointer is used for this purpose.
+   *
+   * First table includes all errors generated by JPEG library itself.
+   * Error code 0 is reserved for a "no such error string" message.
+   */
+  const char * const * jpeg_message_table; /* Library errors */
+  int last_jpeg_message;    /* Table contains strings 0..last_jpeg_message */
+  /* Second table can be added by application (see cjpeg/djpeg for example).
+   * It contains strings numbered first_addon_message..last_addon_message.
+   */
+  const char * const * addon_message_table; /* Non-library errors */
+  int first_addon_message;	/* code for first string in addon table */
+  int last_addon_message;	/* code for last string in addon table */
+};
+
+
+/* Progress monitor object */
+
+struct jpeg_progress_mgr {
+  JMETHOD(void, progress_monitor, (j_common_ptr cinfo));
+
+  long pass_counter;		/* work units completed in this pass */
+  long pass_limit;		/* total number of work units in this pass */
+  int completed_passes;		/* passes completed so far */
+  int total_passes;		/* total number of passes expected */
+};
+
+
+/* Data destination object for compression */
+
+struct jpeg_destination_mgr {
+  JOCTET * next_output_byte;	/* => next byte to write in buffer */
+  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
+
+  JMETHOD(void, init_destination, (j_compress_ptr cinfo));
+  JMETHOD(boolean, empty_output_buffer, (j_compress_ptr cinfo));
+  JMETHOD(void, term_destination, (j_compress_ptr cinfo));
+};
+
+
+/* Data source object for decompression */
+
+struct jpeg_source_mgr {
+  const JOCTET * next_input_byte; /* => next byte to read from buffer */
+  size_t bytes_in_buffer;	/* # of bytes remaining in buffer */
+
+  JMETHOD(void, init_source, (j_decompress_ptr cinfo));
+  JMETHOD(boolean, fill_input_buffer, (j_decompress_ptr cinfo));
+  JMETHOD(void, skip_input_data, (j_decompress_ptr cinfo, long num_bytes));
+  JMETHOD(boolean, resync_to_restart, (j_decompress_ptr cinfo, int desired));
+  JMETHOD(void, term_source, (j_decompress_ptr cinfo));
+};
+
+
+/* Memory manager object.
+ * Allocates "small" objects (a few K total), "large" objects (tens of K),
+ * and "really big" objects (virtual arrays with backing store if needed).
+ * The memory manager does not allow individual objects to be freed; rather,
+ * each created object is assigned to a pool, and whole pools can be freed
+ * at once.  This is faster and more convenient than remembering exactly what
+ * to free, especially where malloc()/free() are not too speedy.
+ * NB: alloc routines never return NULL.  They exit to error_exit if not
+ * successful.
+ */
+
+#define JPOOL_PERMANENT	0	/* lasts until master record is destroyed */
+#define JPOOL_IMAGE	1	/* lasts until done with image/datastream */
+#define JPOOL_NUMPOOLS	2
+
+typedef struct jvirt_sarray_control * jvirt_sarray_ptr;
+typedef struct jvirt_barray_control * jvirt_barray_ptr;
+
+
+struct jpeg_memory_mgr {
+  /* Method pointers */
+  JMETHOD(void *, alloc_small, (j_common_ptr cinfo, int pool_id,
+				size_t sizeofobject));
+  JMETHOD(void FAR *, alloc_large, (j_common_ptr cinfo, int pool_id,
+				     size_t sizeofobject));
+  JMETHOD(JSAMPARRAY, alloc_sarray, (j_common_ptr cinfo, int pool_id,
+				     JDIMENSION samplesperrow,
+				     JDIMENSION numrows));
+  JMETHOD(JBLOCKARRAY, alloc_barray, (j_common_ptr cinfo, int pool_id,
+				      JDIMENSION blocksperrow,
+				      JDIMENSION numrows));
+  JMETHOD(jvirt_sarray_ptr, request_virt_sarray, (j_common_ptr cinfo,
+						  int pool_id,
+						  boolean pre_zero,
+						  JDIMENSION samplesperrow,
+						  JDIMENSION numrows,
+						  JDIMENSION maxaccess));
+  JMETHOD(jvirt_barray_ptr, request_virt_barray, (j_common_ptr cinfo,
+						  int pool_id,
+						  boolean pre_zero,
+						  JDIMENSION blocksperrow,
+						  JDIMENSION numrows,
+						  JDIMENSION maxaccess));
+  JMETHOD(void, realize_virt_arrays, (j_common_ptr cinfo));
+  JMETHOD(JSAMPARRAY, access_virt_sarray, (j_common_ptr cinfo,
+					   jvirt_sarray_ptr ptr,
+					   JDIMENSION start_row,
+					   JDIMENSION num_rows,
+					   boolean writable));
+  JMETHOD(JBLOCKARRAY, access_virt_barray, (j_common_ptr cinfo,
+					    jvirt_barray_ptr ptr,
+					    JDIMENSION start_row,
+					    JDIMENSION num_rows,
+					    boolean writable));
+  JMETHOD(void, free_pool, (j_common_ptr cinfo, int pool_id));
+  JMETHOD(void, self_destruct, (j_common_ptr cinfo));
+
+  /* Limit on memory allocation for this JPEG object.  (Note that this is
+   * merely advisory, not a guaranteed maximum; it only affects the space
+   * used for virtual-array buffers.)  May be changed by outer application
+   * after creating the JPEG object.
+   */
+  long max_memory_to_use;
+
+  /* Maximum allocation request accepted by alloc_large. */
+  long max_alloc_chunk;
+};
+
+
+/* Routine signature for application-supplied marker processing methods.
+ * Need not pass marker code since it is stored in cinfo->unread_marker.
+ */
+typedef JMETHOD(boolean, jpeg_marker_parser_method, (j_decompress_ptr cinfo));
+
+
+/* Declarations for routines called by application.
+ * The JPP macro hides prototype parameters from compilers that can't cope.
+ * Note JPP requires double parentheses.
+ */
+
+#ifdef HAVE_PROTOTYPES
+#define JPP(arglist)	arglist
+#else
+#define JPP(arglist)	()
+#endif
+
+
+/* Short forms of external names for systems with brain-damaged linkers.
+ * We shorten external names to be unique in the first six letters, which
+ * is good enough for all known systems.
+ * (If your compiler itself needs names to be unique in less than 15 
+ * characters, you are out of luck.  Get a better compiler.)
+ */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_std_error		jStdError
+#define jpeg_CreateCompress	jCreaCompress
+#define jpeg_CreateDecompress	jCreaDecompress
+#define jpeg_destroy_compress	jDestCompress
+#define jpeg_destroy_decompress	jDestDecompress
+#define jpeg_stdio_dest		jStdDest
+#define jpeg_stdio_src		jStdSrc
+#define jpeg_mem_dest		jMemDest
+#define jpeg_mem_src		jMemSrc
+#define jpeg_set_defaults	jSetDefaults
+#define jpeg_set_colorspace	jSetColorspace
+#define jpeg_default_colorspace	jDefColorspace
+#define jpeg_set_quality	jSetQuality
+#define jpeg_set_linear_quality	jSetLQuality
+#define jpeg_default_qtables	jDefQTables
+#define jpeg_add_quant_table	jAddQuantTable
+#define jpeg_quality_scaling	jQualityScaling
+#define jpeg_simple_progression	jSimProgress
+#define jpeg_suppress_tables	jSuppressTables
+#define jpeg_alloc_quant_table	jAlcQTable
+#define jpeg_alloc_huff_table	jAlcHTable
+#define jpeg_start_compress	jStrtCompress
+#define jpeg_write_scanlines	jWrtScanlines
+#define jpeg_finish_compress	jFinCompress
+#define jpeg_calc_jpeg_dimensions	jCjpegDimensions
+#define jpeg_write_raw_data	jWrtRawData
+#define jpeg_write_marker	jWrtMarker
+#define jpeg_write_m_header	jWrtMHeader
+#define jpeg_write_m_byte	jWrtMByte
+#define jpeg_write_tables	jWrtTables
+#define jpeg_read_header	jReadHeader
+#define jpeg_start_decompress	jStrtDecompress
+#define jpeg_read_scanlines	jReadScanlines
+#define jpeg_finish_decompress	jFinDecompress
+#define jpeg_read_raw_data	jReadRawData
+#define jpeg_has_multiple_scans	jHasMultScn
+#define jpeg_start_output	jStrtOutput
+#define jpeg_finish_output	jFinOutput
+#define jpeg_input_complete	jInComplete
+#define jpeg_new_colormap	jNewCMap
+#define jpeg_consume_input	jConsumeInput
+#define jpeg_core_output_dimensions	jCoreDimensions
+#define jpeg_calc_output_dimensions	jCalcDimensions
+#define jpeg_save_markers	jSaveMarkers
+#define jpeg_set_marker_processor	jSetMarker
+#define jpeg_read_coefficients	jReadCoefs
+#define jpeg_write_coefficients	jWrtCoefs
+#define jpeg_copy_critical_parameters	jCopyCrit
+#define jpeg_abort_compress	jAbrtCompress
+#define jpeg_abort_decompress	jAbrtDecompress
+#define jpeg_abort		jAbort
+#define jpeg_destroy		jDestroy
+#define jpeg_resync_to_restart	jResyncRestart
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+
+/* Default error-management setup */
+EXTERN(struct jpeg_error_mgr *) jpeg_std_error
+	JPP((struct jpeg_error_mgr * err));
+
+/* Initialization of JPEG compression objects.
+ * jpeg_create_compress() and jpeg_create_decompress() are the exported
+ * names that applications should call.  These expand to calls on
+ * jpeg_CreateCompress and jpeg_CreateDecompress with additional information
+ * passed for version mismatch checking.
+ * NB: you must set up the error-manager BEFORE calling jpeg_create_xxx.
+ */
+#define jpeg_create_compress(cinfo) \
+    jpeg_CreateCompress((cinfo), JPEG_LIB_VERSION, \
+			(size_t) sizeof(struct jpeg_compress_struct))
+#define jpeg_create_decompress(cinfo) \
+    jpeg_CreateDecompress((cinfo), JPEG_LIB_VERSION, \
+			  (size_t) sizeof(struct jpeg_decompress_struct))
+EXTERN(void) jpeg_CreateCompress JPP((j_compress_ptr cinfo,
+				      int version, size_t structsize));
+EXTERN(void) jpeg_CreateDecompress JPP((j_decompress_ptr cinfo,
+					int version, size_t structsize));
+/* Destruction of JPEG compression objects */
+EXTERN(void) jpeg_destroy_compress JPP((j_compress_ptr cinfo));
+EXTERN(void) jpeg_destroy_decompress JPP((j_decompress_ptr cinfo));
+
+/* Standard data source and destination managers: stdio streams. */
+/* Caller is responsible for opening the file before and closing after. */
+EXTERN(void) jpeg_stdio_dest JPP((j_compress_ptr cinfo, FILE * outfile));
+EXTERN(void) jpeg_stdio_src JPP((j_decompress_ptr cinfo, FILE * infile));
+
+/* Data source and destination managers: memory buffers. */
+EXTERN(void) jpeg_mem_dest JPP((j_compress_ptr cinfo,
+			       unsigned char ** outbuffer,
+			       unsigned long * outsize));
+EXTERN(void) jpeg_mem_src JPP((j_decompress_ptr cinfo,
+			      const unsigned char * inbuffer,
+			      unsigned long insize));
+
+/* Default parameter setup for compression */
+EXTERN(void) jpeg_set_defaults JPP((j_compress_ptr cinfo));
+/* Compression parameter setup aids */
+EXTERN(void) jpeg_set_colorspace JPP((j_compress_ptr cinfo,
+				      J_COLOR_SPACE colorspace));
+EXTERN(void) jpeg_default_colorspace JPP((j_compress_ptr cinfo));
+EXTERN(void) jpeg_set_quality JPP((j_compress_ptr cinfo, int quality,
+				   boolean force_baseline));
+EXTERN(void) jpeg_set_linear_quality JPP((j_compress_ptr cinfo,
+					  int scale_factor,
+					  boolean force_baseline));
+EXTERN(void) jpeg_default_qtables JPP((j_compress_ptr cinfo,
+				       boolean force_baseline));
+EXTERN(void) jpeg_add_quant_table JPP((j_compress_ptr cinfo, int which_tbl,
+				       const unsigned int *basic_table,
+				       int scale_factor,
+				       boolean force_baseline));
+EXTERN(int) jpeg_quality_scaling JPP((int quality));
+EXTERN(void) jpeg_simple_progression JPP((j_compress_ptr cinfo));
+EXTERN(void) jpeg_suppress_tables JPP((j_compress_ptr cinfo,
+				       boolean suppress));
+EXTERN(JQUANT_TBL *) jpeg_alloc_quant_table JPP((j_common_ptr cinfo));
+EXTERN(JHUFF_TBL *) jpeg_alloc_huff_table JPP((j_common_ptr cinfo));
+
+/* Main entry points for compression */
+EXTERN(void) jpeg_start_compress JPP((j_compress_ptr cinfo,
+				      boolean write_all_tables));
+EXTERN(JDIMENSION) jpeg_write_scanlines JPP((j_compress_ptr cinfo,
+					     JSAMPARRAY scanlines,
+					     JDIMENSION num_lines));
+EXTERN(void) jpeg_finish_compress JPP((j_compress_ptr cinfo));
+
+/* Precalculate JPEG dimensions for current compression parameters. */
+EXTERN(void) jpeg_calc_jpeg_dimensions JPP((j_compress_ptr cinfo));
+
+/* Replaces jpeg_write_scanlines when writing raw downsampled data. */
+EXTERN(JDIMENSION) jpeg_write_raw_data JPP((j_compress_ptr cinfo,
+					    JSAMPIMAGE data,
+					    JDIMENSION num_lines));
+
+/* Write a special marker.  See libjpeg.txt concerning safe usage. */
+EXTERN(void) jpeg_write_marker
+	JPP((j_compress_ptr cinfo, int marker,
+	     const JOCTET * dataptr, unsigned int datalen));
+/* Same, but piecemeal. */
+EXTERN(void) jpeg_write_m_header
+	JPP((j_compress_ptr cinfo, int marker, unsigned int datalen));
+EXTERN(void) jpeg_write_m_byte
+	JPP((j_compress_ptr cinfo, int val));
+
+/* Alternate compression function: just write an abbreviated table file */
+EXTERN(void) jpeg_write_tables JPP((j_compress_ptr cinfo));
+
+/* Decompression startup: read start of JPEG datastream to see what's there */
+EXTERN(int) jpeg_read_header JPP((j_decompress_ptr cinfo,
+				  boolean require_image));
+/* Return value is one of: */
+#define JPEG_SUSPENDED		0 /* Suspended due to lack of input data */
+#define JPEG_HEADER_OK		1 /* Found valid image datastream */
+#define JPEG_HEADER_TABLES_ONLY	2 /* Found valid table-specs-only datastream */
+/* If you pass require_image = TRUE (normal case), you need not check for
+ * a TABLES_ONLY return code; an abbreviated file will cause an error exit.
+ * JPEG_SUSPENDED is only possible if you use a data source module that can
+ * give a suspension return (the stdio source module doesn't).
+ */
+
+/* Main entry points for decompression */
+EXTERN(boolean) jpeg_start_decompress JPP((j_decompress_ptr cinfo));
+EXTERN(JDIMENSION) jpeg_read_scanlines JPP((j_decompress_ptr cinfo,
+					    JSAMPARRAY scanlines,
+					    JDIMENSION max_lines));
+EXTERN(boolean) jpeg_finish_decompress JPP((j_decompress_ptr cinfo));
+
+/* Replaces jpeg_read_scanlines when reading raw downsampled data. */
+EXTERN(JDIMENSION) jpeg_read_raw_data JPP((j_decompress_ptr cinfo,
+					   JSAMPIMAGE data,
+					   JDIMENSION max_lines));
+
+/* Additional entry points for buffered-image mode. */
+EXTERN(boolean) jpeg_has_multiple_scans JPP((j_decompress_ptr cinfo));
+EXTERN(boolean) jpeg_start_output JPP((j_decompress_ptr cinfo,
+				       int scan_number));
+EXTERN(boolean) jpeg_finish_output JPP((j_decompress_ptr cinfo));
+EXTERN(boolean) jpeg_input_complete JPP((j_decompress_ptr cinfo));
+EXTERN(void) jpeg_new_colormap JPP((j_decompress_ptr cinfo));
+EXTERN(int) jpeg_consume_input JPP((j_decompress_ptr cinfo));
+/* Return value is one of: */
+/* #define JPEG_SUSPENDED	0    Suspended due to lack of input data */
+#define JPEG_REACHED_SOS	1 /* Reached start of new scan */
+#define JPEG_REACHED_EOI	2 /* Reached end of image */
+#define JPEG_ROW_COMPLETED	3 /* Completed one iMCU row */
+#define JPEG_SCAN_COMPLETED	4 /* Completed last iMCU row of a scan */
+
+/* Precalculate output dimensions for current decompression parameters. */
+EXTERN(void) jpeg_core_output_dimensions JPP((j_decompress_ptr cinfo));
+EXTERN(void) jpeg_calc_output_dimensions JPP((j_decompress_ptr cinfo));
+
+/* Control saving of COM and APPn markers into marker_list. */
+EXTERN(void) jpeg_save_markers
+	JPP((j_decompress_ptr cinfo, int marker_code,
+	     unsigned int length_limit));
+
+/* Install a special processing method for COM or APPn markers. */
+EXTERN(void) jpeg_set_marker_processor
+	JPP((j_decompress_ptr cinfo, int marker_code,
+	     jpeg_marker_parser_method routine));
+
+/* Read or write raw DCT coefficients --- useful for lossless transcoding. */
+EXTERN(jvirt_barray_ptr *) jpeg_read_coefficients JPP((j_decompress_ptr cinfo));
+EXTERN(void) jpeg_write_coefficients JPP((j_compress_ptr cinfo,
+					  jvirt_barray_ptr * coef_arrays));
+EXTERN(void) jpeg_copy_critical_parameters JPP((j_decompress_ptr srcinfo,
+						j_compress_ptr dstinfo));
+
+/* If you choose to abort compression or decompression before completing
+ * jpeg_finish_(de)compress, then you need to clean up to release memory,
+ * temporary files, etc.  You can just call jpeg_destroy_(de)compress
+ * if you're done with the JPEG object, but if you want to clean it up and
+ * reuse it, call this:
+ */
+EXTERN(void) jpeg_abort_compress JPP((j_compress_ptr cinfo));
+EXTERN(void) jpeg_abort_decompress JPP((j_decompress_ptr cinfo));
+
+/* Generic versions of jpeg_abort and jpeg_destroy that work on either
+ * flavor of JPEG object.  These may be more convenient in some places.
+ */
+EXTERN(void) jpeg_abort JPP((j_common_ptr cinfo));
+EXTERN(void) jpeg_destroy JPP((j_common_ptr cinfo));
+
+/* Default restart-marker-resync procedure for use by data source modules */
+EXTERN(boolean) jpeg_resync_to_restart JPP((j_decompress_ptr cinfo,
+					    int desired));
+
+
+/* These marker codes are exported since applications and data source modules
+ * are likely to want to use them.
+ */
+
+#define JPEG_RST0	0xD0	/* RST0 marker code */
+#define JPEG_EOI	0xD9	/* EOI marker code */
+#define JPEG_APP0	0xE0	/* APP0 marker code */
+#define JPEG_COM	0xFE	/* COM marker code */
+
+
+/* If we have a brain-damaged compiler that emits warnings (or worse, errors)
+ * for structure definitions that are never filled in, keep it quiet by
+ * supplying dummy definitions for the various substructures.
+ */
+
+#ifdef INCOMPLETE_TYPES_BROKEN
+#ifndef JPEG_INTERNALS		/* will be defined in jpegint.h */
+struct jvirt_sarray_control { long dummy; };
+struct jvirt_barray_control { long dummy; };
+struct jpeg_comp_master { long dummy; };
+struct jpeg_c_main_controller { long dummy; };
+struct jpeg_c_prep_controller { long dummy; };
+struct jpeg_c_coef_controller { long dummy; };
+struct jpeg_marker_writer { long dummy; };
+struct jpeg_color_converter { long dummy; };
+struct jpeg_downsampler { long dummy; };
+struct jpeg_forward_dct { long dummy; };
+struct jpeg_entropy_encoder { long dummy; };
+struct jpeg_decomp_master { long dummy; };
+struct jpeg_d_main_controller { long dummy; };
+struct jpeg_d_coef_controller { long dummy; };
+struct jpeg_d_post_controller { long dummy; };
+struct jpeg_input_controller { long dummy; };
+struct jpeg_marker_reader { long dummy; };
+struct jpeg_entropy_decoder { long dummy; };
+struct jpeg_inverse_dct { long dummy; };
+struct jpeg_upsampler { long dummy; };
+struct jpeg_color_deconverter { long dummy; };
+struct jpeg_color_quantizer { long dummy; };
+#endif /* JPEG_INTERNALS */
+#endif /* INCOMPLETE_TYPES_BROKEN */
+
+
+/*
+ * The JPEG library modules define JPEG_INTERNALS before including this file.
+ * The internal structure declarations are read only when that is true.
+ * Applications using the library should not include jpegint.h, but may wish
+ * to include jerror.h.
+ */
+
+#ifdef JPEG_INTERNALS
+#include "jpegint.h"		/* fetch private declarations */
+#include "jerror.h"		/* fetch error codes too */
+#endif
+
+#ifdef __cplusplus
+#ifndef DONT_USE_EXTERN_C
+}
+#endif
+#endif
+
+#endif /* JPEGLIB_H */
diff --git a/libraries/jpeg/jquant1.c b/libraries/jpeg/jquant1.c
new file mode 100644
index 000000000..9d11f7066
--- /dev/null
+++ b/libraries/jpeg/jquant1.c
@@ -0,0 +1,857 @@
+/*
+ * jquant1.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * Modified 2011 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains 1-pass color quantization (color mapping) routines.
+ * These routines provide mapping to a fixed color map using equally spaced
+ * color values.  Optional Floyd-Steinberg or ordered dithering is available.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+#ifdef QUANT_1PASS_SUPPORTED
+
+
+/*
+ * The main purpose of 1-pass quantization is to provide a fast, if not very
+ * high quality, colormapped output capability.  A 2-pass quantizer usually
+ * gives better visual quality; however, for quantized grayscale output this
+ * quantizer is perfectly adequate.  Dithering is highly recommended with this
+ * quantizer, though you can turn it off if you really want to.
+ *
+ * In 1-pass quantization the colormap must be chosen in advance of seeing the
+ * image.  We use a map consisting of all combinations of Ncolors[i] color
+ * values for the i'th component.  The Ncolors[] values are chosen so that
+ * their product, the total number of colors, is no more than that requested.
+ * (In most cases, the product will be somewhat less.)
+ *
+ * Since the colormap is orthogonal, the representative value for each color
+ * component can be determined without considering the other components;
+ * then these indexes can be combined into a colormap index by a standard
+ * N-dimensional-array-subscript calculation.  Most of the arithmetic involved
+ * can be precalculated and stored in the lookup table colorindex[].
+ * colorindex[i][j] maps pixel value j in component i to the nearest
+ * representative value (grid plane) for that component; this index is
+ * multiplied by the array stride for component i, so that the
+ * index of the colormap entry closest to a given pixel value is just
+ *    sum( colorindex[component-number][pixel-component-value] )
+ * Aside from being fast, this scheme allows for variable spacing between
+ * representative values with no additional lookup cost.
+ *
+ * If gamma correction has been applied in color conversion, it might be wise
+ * to adjust the color grid spacing so that the representative colors are
+ * equidistant in linear space.  At this writing, gamma correction is not
+ * implemented by jdcolor, so nothing is done here.
+ */
+
+
+/* Declarations for ordered dithering.
+ *
+ * We use a standard 16x16 ordered dither array.  The basic concept of ordered
+ * dithering is described in many references, for instance Dale Schumacher's
+ * chapter II.2 of Graphics Gems II (James Arvo, ed. Academic Press, 1991).
+ * In place of Schumacher's comparisons against a "threshold" value, we add a
+ * "dither" value to the input pixel and then round the result to the nearest
+ * output value.  The dither value is equivalent to (0.5 - threshold) times
+ * the distance between output values.  For ordered dithering, we assume that
+ * the output colors are equally spaced; if not, results will probably be
+ * worse, since the dither may be too much or too little at a given point.
+ *
+ * The normal calculation would be to form pixel value + dither, range-limit
+ * this to 0..MAXJSAMPLE, and then index into the colorindex table as usual.
+ * We can skip the separate range-limiting step by extending the colorindex
+ * table in both directions.
+ */
+
+#define ODITHER_SIZE  16	/* dimension of dither matrix */
+/* NB: if ODITHER_SIZE is not a power of 2, ODITHER_MASK uses will break */
+#define ODITHER_CELLS (ODITHER_SIZE*ODITHER_SIZE)	/* # cells in matrix */
+#define ODITHER_MASK  (ODITHER_SIZE-1) /* mask for wrapping around counters */
+
+typedef int ODITHER_MATRIX[ODITHER_SIZE][ODITHER_SIZE];
+typedef int (*ODITHER_MATRIX_PTR)[ODITHER_SIZE];
+
+static const UINT8 base_dither_matrix[ODITHER_SIZE][ODITHER_SIZE] = {
+  /* Bayer's order-4 dither array.  Generated by the code given in
+   * Stephen Hawley's article "Ordered Dithering" in Graphics Gems I.
+   * The values in this array must range from 0 to ODITHER_CELLS-1.
+   */
+  {   0,192, 48,240, 12,204, 60,252,  3,195, 51,243, 15,207, 63,255 },
+  { 128, 64,176,112,140, 76,188,124,131, 67,179,115,143, 79,191,127 },
+  {  32,224, 16,208, 44,236, 28,220, 35,227, 19,211, 47,239, 31,223 },
+  { 160, 96,144, 80,172,108,156, 92,163, 99,147, 83,175,111,159, 95 },
+  {   8,200, 56,248,  4,196, 52,244, 11,203, 59,251,  7,199, 55,247 },
+  { 136, 72,184,120,132, 68,180,116,139, 75,187,123,135, 71,183,119 },
+  {  40,232, 24,216, 36,228, 20,212, 43,235, 27,219, 39,231, 23,215 },
+  { 168,104,152, 88,164,100,148, 84,171,107,155, 91,167,103,151, 87 },
+  {   2,194, 50,242, 14,206, 62,254,  1,193, 49,241, 13,205, 61,253 },
+  { 130, 66,178,114,142, 78,190,126,129, 65,177,113,141, 77,189,125 },
+  {  34,226, 18,210, 46,238, 30,222, 33,225, 17,209, 45,237, 29,221 },
+  { 162, 98,146, 82,174,110,158, 94,161, 97,145, 81,173,109,157, 93 },
+  {  10,202, 58,250,  6,198, 54,246,  9,201, 57,249,  5,197, 53,245 },
+  { 138, 74,186,122,134, 70,182,118,137, 73,185,121,133, 69,181,117 },
+  {  42,234, 26,218, 38,230, 22,214, 41,233, 25,217, 37,229, 21,213 },
+  { 170,106,154, 90,166,102,150, 86,169,105,153, 89,165,101,149, 85 }
+};
+
+
+/* Declarations for Floyd-Steinberg dithering.
+ *
+ * Errors are accumulated into the array fserrors[], at a resolution of
+ * 1/16th of a pixel count.  The error at a given pixel is propagated
+ * to its not-yet-processed neighbors using the standard F-S fractions,
+ *		...	(here)	7/16
+ *		3/16	5/16	1/16
+ * We work left-to-right on even rows, right-to-left on odd rows.
+ *
+ * We can get away with a single array (holding one row's worth of errors)
+ * by using it to store the current row's errors at pixel columns not yet
+ * processed, but the next row's errors at columns already processed.  We
+ * need only a few extra variables to hold the errors immediately around the
+ * current column.  (If we are lucky, those variables are in registers, but
+ * even if not, they're probably cheaper to access than array elements are.)
+ *
+ * The fserrors[] array is indexed [component#][position].
+ * We provide (#columns + 2) entries per component; the extra entry at each
+ * end saves us from special-casing the first and last pixels.
+ *
+ * Note: on a wide image, we might not have enough room in a PC's near data
+ * segment to hold the error array; so it is allocated with alloc_large.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+typedef INT16 FSERROR;		/* 16 bits should be enough */
+typedef int LOCFSERROR;		/* use 'int' for calculation temps */
+#else
+typedef INT32 FSERROR;		/* may need more than 16 bits */
+typedef INT32 LOCFSERROR;	/* be sure calculation temps are big enough */
+#endif
+
+typedef FSERROR FAR *FSERRPTR;	/* pointer to error array (in FAR storage!) */
+
+
+/* Private subobject */
+
+#define MAX_Q_COMPS 4		/* max components I can handle */
+
+typedef struct {
+  struct jpeg_color_quantizer pub; /* public fields */
+
+  /* Initially allocated colormap is saved here */
+  JSAMPARRAY sv_colormap;	/* The color map as a 2-D pixel array */
+  int sv_actual;		/* number of entries in use */
+
+  JSAMPARRAY colorindex;	/* Precomputed mapping for speed */
+  /* colorindex[i][j] = index of color closest to pixel value j in component i,
+   * premultiplied as described above.  Since colormap indexes must fit into
+   * JSAMPLEs, the entries of this array will too.
+   */
+  boolean is_padded;		/* is the colorindex padded for odither? */
+
+  int Ncolors[MAX_Q_COMPS];	/* # of values alloced to each component */
+
+  /* Variables for ordered dithering */
+  int row_index;		/* cur row's vertical index in dither matrix */
+  ODITHER_MATRIX_PTR odither[MAX_Q_COMPS]; /* one dither array per component */
+
+  /* Variables for Floyd-Steinberg dithering */
+  FSERRPTR fserrors[MAX_Q_COMPS]; /* accumulated errors */
+  boolean on_odd_row;		/* flag to remember which row we are on */
+} my_cquantizer;
+
+typedef my_cquantizer * my_cquantize_ptr;
+
+
+/*
+ * Policy-making subroutines for create_colormap and create_colorindex.
+ * These routines determine the colormap to be used.  The rest of the module
+ * only assumes that the colormap is orthogonal.
+ *
+ *  * select_ncolors decides how to divvy up the available colors
+ *    among the components.
+ *  * output_value defines the set of representative values for a component.
+ *  * largest_input_value defines the mapping from input values to
+ *    representative values for a component.
+ * Note that the latter two routines may impose different policies for
+ * different components, though this is not currently done.
+ */
+
+
+LOCAL(int)
+select_ncolors (j_decompress_ptr cinfo, int Ncolors[])
+/* Determine allocation of desired colors to components, */
+/* and fill in Ncolors[] array to indicate choice. */
+/* Return value is total number of colors (product of Ncolors[] values). */
+{
+  int nc = cinfo->out_color_components; /* number of color components */
+  int max_colors = cinfo->desired_number_of_colors;
+  int total_colors, iroot, i, j;
+  boolean changed;
+  long temp;
+  static const int RGB_order[3] = { RGB_GREEN, RGB_RED, RGB_BLUE };
+
+  /* We can allocate at least the nc'th root of max_colors per component. */
+  /* Compute floor(nc'th root of max_colors). */
+  iroot = 1;
+  do {
+    iroot++;
+    temp = iroot;		/* set temp = iroot ** nc */
+    for (i = 1; i < nc; i++)
+      temp *= iroot;
+  } while (temp <= (long) max_colors); /* repeat till iroot exceeds root */
+  iroot--;			/* now iroot = floor(root) */
+
+  /* Must have at least 2 color values per component */
+  if (iroot < 2)
+    ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, (int) temp);
+
+  /* Initialize to iroot color values for each component */
+  total_colors = 1;
+  for (i = 0; i < nc; i++) {
+    Ncolors[i] = iroot;
+    total_colors *= iroot;
+  }
+  /* We may be able to increment the count for one or more components without
+   * exceeding max_colors, though we know not all can be incremented.
+   * Sometimes, the first component can be incremented more than once!
+   * (Example: for 16 colors, we start at 2*2*2, go to 3*2*2, then 4*2*2.)
+   * In RGB colorspace, try to increment G first, then R, then B.
+   */
+  do {
+    changed = FALSE;
+    for (i = 0; i < nc; i++) {
+      j = (cinfo->out_color_space == JCS_RGB ? RGB_order[i] : i);
+      /* calculate new total_colors if Ncolors[j] is incremented */
+      temp = total_colors / Ncolors[j];
+      temp *= Ncolors[j]+1;	/* done in long arith to avoid oflo */
+      if (temp > (long) max_colors)
+	break;			/* won't fit, done with this pass */
+      Ncolors[j]++;		/* OK, apply the increment */
+      total_colors = (int) temp;
+      changed = TRUE;
+    }
+  } while (changed);
+
+  return total_colors;
+}
+
+
+LOCAL(int)
+output_value (j_decompress_ptr cinfo, int ci, int j, int maxj)
+/* Return j'th output value, where j will range from 0 to maxj */
+/* The output values must fall in 0..MAXJSAMPLE in increasing order */
+{
+  /* We always provide values 0 and MAXJSAMPLE for each component;
+   * any additional values are equally spaced between these limits.
+   * (Forcing the upper and lower values to the limits ensures that
+   * dithering can't produce a color outside the selected gamut.)
+   */
+  return (int) (((INT32) j * MAXJSAMPLE + maxj/2) / maxj);
+}
+
+
+LOCAL(int)
+largest_input_value (j_decompress_ptr cinfo, int ci, int j, int maxj)
+/* Return largest input value that should map to j'th output value */
+/* Must have largest(j=0) >= 0, and largest(j=maxj) >= MAXJSAMPLE */
+{
+  /* Breakpoints are halfway between values returned by output_value */
+  return (int) (((INT32) (2*j + 1) * MAXJSAMPLE + maxj) / (2*maxj));
+}
+
+
+/*
+ * Create the colormap.
+ */
+
+LOCAL(void)
+create_colormap (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  JSAMPARRAY colormap;		/* Created colormap */
+  int total_colors;		/* Number of distinct output colors */
+  int i,j,k, nci, blksize, blkdist, ptr, val;
+
+  /* Select number of colors for each component */
+  total_colors = select_ncolors(cinfo, cquantize->Ncolors);
+
+  /* Report selected color counts */
+  if (cinfo->out_color_components == 3)
+    TRACEMS4(cinfo, 1, JTRC_QUANT_3_NCOLORS,
+	     total_colors, cquantize->Ncolors[0],
+	     cquantize->Ncolors[1], cquantize->Ncolors[2]);
+  else
+    TRACEMS1(cinfo, 1, JTRC_QUANT_NCOLORS, total_colors);
+
+  /* Allocate and fill in the colormap. */
+  /* The colors are ordered in the map in standard row-major order, */
+  /* i.e. rightmost (highest-indexed) color changes most rapidly. */
+
+  colormap = (*cinfo->mem->alloc_sarray)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE,
+     (JDIMENSION) total_colors, (JDIMENSION) cinfo->out_color_components);
+
+  /* blksize is number of adjacent repeated entries for a component */
+  /* blkdist is distance between groups of identical entries for a component */
+  blkdist = total_colors;
+
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    /* fill in colormap entries for i'th color component */
+    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
+    blksize = blkdist / nci;
+    for (j = 0; j < nci; j++) {
+      /* Compute j'th output value (out of nci) for component */
+      val = output_value(cinfo, i, j, nci-1);
+      /* Fill in all colormap entries that have this value of this component */
+      for (ptr = j * blksize; ptr < total_colors; ptr += blkdist) {
+	/* fill in blksize entries beginning at ptr */
+	for (k = 0; k < blksize; k++)
+	  colormap[i][ptr+k] = (JSAMPLE) val;
+      }
+    }
+    blkdist = blksize;		/* blksize of this color is blkdist of next */
+  }
+
+  /* Save the colormap in private storage,
+   * where it will survive color quantization mode changes.
+   */
+  cquantize->sv_colormap = colormap;
+  cquantize->sv_actual = total_colors;
+}
+
+
+/*
+ * Create the color index table.
+ */
+
+LOCAL(void)
+create_colorindex (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  JSAMPROW indexptr;
+  int i,j,k, nci, blksize, val, pad;
+
+  /* For ordered dither, we pad the color index tables by MAXJSAMPLE in
+   * each direction (input index values can be -MAXJSAMPLE .. 2*MAXJSAMPLE).
+   * This is not necessary in the other dithering modes.  However, we
+   * flag whether it was done in case user changes dithering mode.
+   */
+  if (cinfo->dither_mode == JDITHER_ORDERED) {
+    pad = MAXJSAMPLE*2;
+    cquantize->is_padded = TRUE;
+  } else {
+    pad = 0;
+    cquantize->is_padded = FALSE;
+  }
+
+  cquantize->colorindex = (*cinfo->mem->alloc_sarray)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE,
+     (JDIMENSION) (MAXJSAMPLE+1 + pad),
+     (JDIMENSION) cinfo->out_color_components);
+
+  /* blksize is number of adjacent repeated entries for a component */
+  blksize = cquantize->sv_actual;
+
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    /* fill in colorindex entries for i'th color component */
+    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
+    blksize = blksize / nci;
+
+    /* adjust colorindex pointers to provide padding at negative indexes. */
+    if (pad)
+      cquantize->colorindex[i] += MAXJSAMPLE;
+
+    /* in loop, val = index of current output value, */
+    /* and k = largest j that maps to current val */
+    indexptr = cquantize->colorindex[i];
+    val = 0;
+    k = largest_input_value(cinfo, i, 0, nci-1);
+    for (j = 0; j <= MAXJSAMPLE; j++) {
+      while (j > k)		/* advance val if past boundary */
+	k = largest_input_value(cinfo, i, ++val, nci-1);
+      /* premultiply so that no multiplication needed in main processing */
+      indexptr[j] = (JSAMPLE) (val * blksize);
+    }
+    /* Pad at both ends if necessary */
+    if (pad)
+      for (j = 1; j <= MAXJSAMPLE; j++) {
+	indexptr[-j] = indexptr[0];
+	indexptr[MAXJSAMPLE+j] = indexptr[MAXJSAMPLE];
+      }
+  }
+}
+
+
+/*
+ * Create an ordered-dither array for a component having ncolors
+ * distinct output values.
+ */
+
+LOCAL(ODITHER_MATRIX_PTR)
+make_odither_array (j_decompress_ptr cinfo, int ncolors)
+{
+  ODITHER_MATRIX_PTR odither;
+  int j,k;
+  INT32 num,den;
+
+  odither = (ODITHER_MATRIX_PTR)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(ODITHER_MATRIX));
+  /* The inter-value distance for this color is MAXJSAMPLE/(ncolors-1).
+   * Hence the dither value for the matrix cell with fill order f
+   * (f=0..N-1) should be (N-1-2*f)/(2*N) * MAXJSAMPLE/(ncolors-1).
+   * On 16-bit-int machine, be careful to avoid overflow.
+   */
+  den = 2 * ODITHER_CELLS * ((INT32) (ncolors - 1));
+  for (j = 0; j < ODITHER_SIZE; j++) {
+    for (k = 0; k < ODITHER_SIZE; k++) {
+      num = ((INT32) (ODITHER_CELLS-1 - 2*((int)base_dither_matrix[j][k])))
+	    * MAXJSAMPLE;
+      /* Ensure round towards zero despite C's lack of consistency
+       * about rounding negative values in integer division...
+       */
+      odither[j][k] = (int) (num<0 ? -((-num)/den) : num/den);
+    }
+  }
+  return odither;
+}
+
+
+/*
+ * Create the ordered-dither tables.
+ * Components having the same number of representative colors may 
+ * share a dither table.
+ */
+
+LOCAL(void)
+create_odither_tables (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  ODITHER_MATRIX_PTR odither;
+  int i, j, nci;
+
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
+    odither = NULL;		/* search for matching prior component */
+    for (j = 0; j < i; j++) {
+      if (nci == cquantize->Ncolors[j]) {
+	odither = cquantize->odither[j];
+	break;
+      }
+    }
+    if (odither == NULL)	/* need a new table? */
+      odither = make_odither_array(cinfo, nci);
+    cquantize->odither[i] = odither;
+  }
+}
+
+
+/*
+ * Map some rows of pixels to the output colormapped representation.
+ */
+
+METHODDEF(void)
+color_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		JSAMPARRAY output_buf, int num_rows)
+/* General case, no dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  JSAMPARRAY colorindex = cquantize->colorindex;
+  register int pixcode, ci;
+  register JSAMPROW ptrin, ptrout;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+  register int nc = cinfo->out_color_components;
+
+  for (row = 0; row < num_rows; row++) {
+    ptrin = input_buf[row];
+    ptrout = output_buf[row];
+    for (col = width; col > 0; col--) {
+      pixcode = 0;
+      for (ci = 0; ci < nc; ci++) {
+	pixcode += GETJSAMPLE(colorindex[ci][GETJSAMPLE(*ptrin++)]);
+      }
+      *ptrout++ = (JSAMPLE) pixcode;
+    }
+  }
+}
+
+
+METHODDEF(void)
+color_quantize3 (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		 JSAMPARRAY output_buf, int num_rows)
+/* Fast path for out_color_components==3, no dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register int pixcode;
+  register JSAMPROW ptrin, ptrout;
+  JSAMPROW colorindex0 = cquantize->colorindex[0];
+  JSAMPROW colorindex1 = cquantize->colorindex[1];
+  JSAMPROW colorindex2 = cquantize->colorindex[2];
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    ptrin = input_buf[row];
+    ptrout = output_buf[row];
+    for (col = width; col > 0; col--) {
+      pixcode  = GETJSAMPLE(colorindex0[GETJSAMPLE(*ptrin++)]);
+      pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*ptrin++)]);
+      pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*ptrin++)]);
+      *ptrout++ = (JSAMPLE) pixcode;
+    }
+  }
+}
+
+
+METHODDEF(void)
+quantize_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		     JSAMPARRAY output_buf, int num_rows)
+/* General case, with ordered dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register JSAMPROW input_ptr;
+  register JSAMPROW output_ptr;
+  JSAMPROW colorindex_ci;
+  int * dither;			/* points to active row of dither matrix */
+  int row_index, col_index;	/* current indexes into dither matrix */
+  int nc = cinfo->out_color_components;
+  int ci;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    /* Initialize output values to 0 so can process components separately */
+    FMEMZERO((void FAR *) output_buf[row],
+	     (size_t) (width * SIZEOF(JSAMPLE)));
+    row_index = cquantize->row_index;
+    for (ci = 0; ci < nc; ci++) {
+      input_ptr = input_buf[row] + ci;
+      output_ptr = output_buf[row];
+      colorindex_ci = cquantize->colorindex[ci];
+      dither = cquantize->odither[ci][row_index];
+      col_index = 0;
+
+      for (col = width; col > 0; col--) {
+	/* Form pixel value + dither, range-limit to 0..MAXJSAMPLE,
+	 * select output value, accumulate into output code for this pixel.
+	 * Range-limiting need not be done explicitly, as we have extended
+	 * the colorindex table to produce the right answers for out-of-range
+	 * inputs.  The maximum dither is +- MAXJSAMPLE; this sets the
+	 * required amount of padding.
+	 */
+	*output_ptr += colorindex_ci[GETJSAMPLE(*input_ptr)+dither[col_index]];
+	input_ptr += nc;
+	output_ptr++;
+	col_index = (col_index + 1) & ODITHER_MASK;
+      }
+    }
+    /* Advance row index for next row */
+    row_index = (row_index + 1) & ODITHER_MASK;
+    cquantize->row_index = row_index;
+  }
+}
+
+
+METHODDEF(void)
+quantize3_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		      JSAMPARRAY output_buf, int num_rows)
+/* Fast path for out_color_components==3, with ordered dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register int pixcode;
+  register JSAMPROW input_ptr;
+  register JSAMPROW output_ptr;
+  JSAMPROW colorindex0 = cquantize->colorindex[0];
+  JSAMPROW colorindex1 = cquantize->colorindex[1];
+  JSAMPROW colorindex2 = cquantize->colorindex[2];
+  int * dither0;		/* points to active row of dither matrix */
+  int * dither1;
+  int * dither2;
+  int row_index, col_index;	/* current indexes into dither matrix */
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    row_index = cquantize->row_index;
+    input_ptr = input_buf[row];
+    output_ptr = output_buf[row];
+    dither0 = cquantize->odither[0][row_index];
+    dither1 = cquantize->odither[1][row_index];
+    dither2 = cquantize->odither[2][row_index];
+    col_index = 0;
+
+    for (col = width; col > 0; col--) {
+      pixcode  = GETJSAMPLE(colorindex0[GETJSAMPLE(*input_ptr++) +
+					dither0[col_index]]);
+      pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*input_ptr++) +
+					dither1[col_index]]);
+      pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*input_ptr++) +
+					dither2[col_index]]);
+      *output_ptr++ = (JSAMPLE) pixcode;
+      col_index = (col_index + 1) & ODITHER_MASK;
+    }
+    row_index = (row_index + 1) & ODITHER_MASK;
+    cquantize->row_index = row_index;
+  }
+}
+
+
+METHODDEF(void)
+quantize_fs_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		    JSAMPARRAY output_buf, int num_rows)
+/* General case, with Floyd-Steinberg dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register LOCFSERROR cur;	/* current error or pixel value */
+  LOCFSERROR belowerr;		/* error for pixel below cur */
+  LOCFSERROR bpreverr;		/* error for below/prev col */
+  LOCFSERROR bnexterr;		/* error for below/next col */
+  LOCFSERROR delta;
+  register FSERRPTR errorptr;	/* => fserrors[] at column before current */
+  register JSAMPROW input_ptr;
+  register JSAMPROW output_ptr;
+  JSAMPROW colorindex_ci;
+  JSAMPROW colormap_ci;
+  int pixcode;
+  int nc = cinfo->out_color_components;
+  int dir;			/* 1 for left-to-right, -1 for right-to-left */
+  int dirnc;			/* dir * nc */
+  int ci;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+  JSAMPLE *range_limit = cinfo->sample_range_limit;
+  SHIFT_TEMPS
+
+  for (row = 0; row < num_rows; row++) {
+    /* Initialize output values to 0 so can process components separately */
+    FMEMZERO((void FAR *) output_buf[row],
+	     (size_t) (width * SIZEOF(JSAMPLE)));
+    for (ci = 0; ci < nc; ci++) {
+      input_ptr = input_buf[row] + ci;
+      output_ptr = output_buf[row];
+      if (cquantize->on_odd_row) {
+	/* work right to left in this row */
+	input_ptr += (width-1) * nc; /* so point to rightmost pixel */
+	output_ptr += width-1;
+	dir = -1;
+	dirnc = -nc;
+	errorptr = cquantize->fserrors[ci] + (width+1); /* => entry after last column */
+      } else {
+	/* work left to right in this row */
+	dir = 1;
+	dirnc = nc;
+	errorptr = cquantize->fserrors[ci]; /* => entry before first column */
+      }
+      colorindex_ci = cquantize->colorindex[ci];
+      colormap_ci = cquantize->sv_colormap[ci];
+      /* Preset error values: no error propagated to first pixel from left */
+      cur = 0;
+      /* and no error propagated to row below yet */
+      belowerr = bpreverr = 0;
+
+      for (col = width; col > 0; col--) {
+	/* cur holds the error propagated from the previous pixel on the
+	 * current line.  Add the error propagated from the previous line
+	 * to form the complete error correction term for this pixel, and
+	 * round the error term (which is expressed * 16) to an integer.
+	 * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct
+	 * for either sign of the error value.
+	 * Note: errorptr points to *previous* column's array entry.
+	 */
+	cur = RIGHT_SHIFT(cur + errorptr[dir] + 8, 4);
+	/* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
+	 * The maximum error is +- MAXJSAMPLE; this sets the required size
+	 * of the range_limit array.
+	 */
+	cur += GETJSAMPLE(*input_ptr);
+	cur = GETJSAMPLE(range_limit[cur]);
+	/* Select output value, accumulate into output code for this pixel */
+	pixcode = GETJSAMPLE(colorindex_ci[cur]);
+	*output_ptr += (JSAMPLE) pixcode;
+	/* Compute actual representation error at this pixel */
+	/* Note: we can do this even though we don't have the final */
+	/* pixel code, because the colormap is orthogonal. */
+	cur -= GETJSAMPLE(colormap_ci[pixcode]);
+	/* Compute error fractions to be propagated to adjacent pixels.
+	 * Add these into the running sums, and simultaneously shift the
+	 * next-line error sums left by 1 column.
+	 */
+	bnexterr = cur;
+	delta = cur * 2;
+	cur += delta;		/* form error * 3 */
+	errorptr[0] = (FSERROR) (bpreverr + cur);
+	cur += delta;		/* form error * 5 */
+	bpreverr = belowerr + cur;
+	belowerr = bnexterr;
+	cur += delta;		/* form error * 7 */
+	/* At this point cur contains the 7/16 error value to be propagated
+	 * to the next pixel on the current line, and all the errors for the
+	 * next line have been shifted over. We are therefore ready to move on.
+	 */
+	input_ptr += dirnc;	/* advance input ptr to next column */
+	output_ptr += dir;	/* advance output ptr to next column */
+	errorptr += dir;	/* advance errorptr to current column */
+      }
+      /* Post-loop cleanup: we must unload the final error value into the
+       * final fserrors[] entry.  Note we need not unload belowerr because
+       * it is for the dummy column before or after the actual array.
+       */
+      errorptr[0] = (FSERROR) bpreverr; /* unload prev err into array */
+    }
+    cquantize->on_odd_row = (cquantize->on_odd_row ? FALSE : TRUE);
+  }
+}
+
+
+/*
+ * Allocate workspace for Floyd-Steinberg errors.
+ */
+
+LOCAL(void)
+alloc_fs_workspace (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  size_t arraysize;
+  int i;
+
+  arraysize = (size_t) ((cinfo->output_width + 2) * SIZEOF(FSERROR));
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    cquantize->fserrors[i] = (FSERRPTR)
+      (*cinfo->mem->alloc_large)((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize);
+  }
+}
+
+
+/*
+ * Initialize for one-pass color quantization.
+ */
+
+METHODDEF(void)
+start_pass_1_quant (j_decompress_ptr cinfo, boolean is_pre_scan)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  size_t arraysize;
+  int i;
+
+  /* Install my colormap. */
+  cinfo->colormap = cquantize->sv_colormap;
+  cinfo->actual_number_of_colors = cquantize->sv_actual;
+
+  /* Initialize for desired dithering mode. */
+  switch (cinfo->dither_mode) {
+  case JDITHER_NONE:
+    if (cinfo->out_color_components == 3)
+      cquantize->pub.color_quantize = color_quantize3;
+    else
+      cquantize->pub.color_quantize = color_quantize;
+    break;
+  case JDITHER_ORDERED:
+    if (cinfo->out_color_components == 3)
+      cquantize->pub.color_quantize = quantize3_ord_dither;
+    else
+      cquantize->pub.color_quantize = quantize_ord_dither;
+    cquantize->row_index = 0;	/* initialize state for ordered dither */
+    /* If user changed to ordered dither from another mode,
+     * we must recreate the color index table with padding.
+     * This will cost extra space, but probably isn't very likely.
+     */
+    if (! cquantize->is_padded)
+      create_colorindex(cinfo);
+    /* Create ordered-dither tables if we didn't already. */
+    if (cquantize->odither[0] == NULL)
+      create_odither_tables(cinfo);
+    break;
+  case JDITHER_FS:
+    cquantize->pub.color_quantize = quantize_fs_dither;
+    cquantize->on_odd_row = FALSE; /* initialize state for F-S dither */
+    /* Allocate Floyd-Steinberg workspace if didn't already. */
+    if (cquantize->fserrors[0] == NULL)
+      alloc_fs_workspace(cinfo);
+    /* Initialize the propagated errors to zero. */
+    arraysize = (size_t) ((cinfo->output_width + 2) * SIZEOF(FSERROR));
+    for (i = 0; i < cinfo->out_color_components; i++)
+      FMEMZERO((void FAR *) cquantize->fserrors[i], arraysize);
+    break;
+  default:
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+    break;
+  }
+}
+
+
+/*
+ * Finish up at the end of the pass.
+ */
+
+METHODDEF(void)
+finish_pass_1_quant (j_decompress_ptr cinfo)
+{
+  /* no work in 1-pass case */
+}
+
+
+/*
+ * Switch to a new external colormap between output passes.
+ * Shouldn't get to this module!
+ */
+
+METHODDEF(void)
+new_color_map_1_quant (j_decompress_ptr cinfo)
+{
+  ERREXIT(cinfo, JERR_MODE_CHANGE);
+}
+
+
+/*
+ * Module initialization routine for 1-pass color quantization.
+ */
+
+GLOBAL(void)
+jinit_1pass_quantizer (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize;
+
+  cquantize = (my_cquantize_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_cquantizer));
+  cinfo->cquantize = (struct jpeg_color_quantizer *) cquantize;
+  cquantize->pub.start_pass = start_pass_1_quant;
+  cquantize->pub.finish_pass = finish_pass_1_quant;
+  cquantize->pub.new_color_map = new_color_map_1_quant;
+  cquantize->fserrors[0] = NULL; /* Flag FS workspace not allocated */
+  cquantize->odither[0] = NULL;	/* Also flag odither arrays not allocated */
+
+  /* Make sure my internal arrays won't overflow */
+  if (cinfo->out_color_components > MAX_Q_COMPS)
+    ERREXIT1(cinfo, JERR_QUANT_COMPONENTS, MAX_Q_COMPS);
+  /* Make sure colormap indexes can be represented by JSAMPLEs */
+  if (cinfo->desired_number_of_colors > (MAXJSAMPLE+1))
+    ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXJSAMPLE+1);
+
+  /* Create the colormap and color index table. */
+  create_colormap(cinfo);
+  create_colorindex(cinfo);
+
+  /* Allocate Floyd-Steinberg workspace now if requested.
+   * We do this now since it is FAR storage and may affect the memory
+   * manager's space calculations.  If the user changes to FS dither
+   * mode in a later pass, we will allocate the space then, and will
+   * possibly overrun the max_memory_to_use setting.
+   */
+  if (cinfo->dither_mode == JDITHER_FS)
+    alloc_fs_workspace(cinfo);
+}
+
+#endif /* QUANT_1PASS_SUPPORTED */
diff --git a/libraries/jpeg/jquant2.c b/libraries/jpeg/jquant2.c
new file mode 100644
index 000000000..38fc2af7a
--- /dev/null
+++ b/libraries/jpeg/jquant2.c
@@ -0,0 +1,1311 @@
+/*
+ * jquant2.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * Modified 2011 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains 2-pass color quantization (color mapping) routines.
+ * These routines provide selection of a custom color map for an image,
+ * followed by mapping of the image to that color map, with optional
+ * Floyd-Steinberg dithering.
+ * It is also possible to use just the second pass to map to an arbitrary
+ * externally-given color map.
+ *
+ * Note: ordered dithering is not supported, since there isn't any fast
+ * way to compute intercolor distances; it's unclear that ordered dither's
+ * fundamental assumptions even hold with an irregularly spaced color map.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+#ifdef QUANT_2PASS_SUPPORTED
+
+
+/*
+ * This module implements the well-known Heckbert paradigm for color
+ * quantization.  Most of the ideas used here can be traced back to
+ * Heckbert's seminal paper
+ *   Heckbert, Paul.  "Color Image Quantization for Frame Buffer Display",
+ *   Proc. SIGGRAPH '82, Computer Graphics v.16 #3 (July 1982), pp 297-304.
+ *
+ * In the first pass over the image, we accumulate a histogram showing the
+ * usage count of each possible color.  To keep the histogram to a reasonable
+ * size, we reduce the precision of the input; typical practice is to retain
+ * 5 or 6 bits per color, so that 8 or 4 different input values are counted
+ * in the same histogram cell.
+ *
+ * Next, the color-selection step begins with a box representing the whole
+ * color space, and repeatedly splits the "largest" remaining box until we
+ * have as many boxes as desired colors.  Then the mean color in each
+ * remaining box becomes one of the possible output colors.
+ * 
+ * The second pass over the image maps each input pixel to the closest output
+ * color (optionally after applying a Floyd-Steinberg dithering correction).
+ * This mapping is logically trivial, but making it go fast enough requires
+ * considerable care.
+ *
+ * Heckbert-style quantizers vary a good deal in their policies for choosing
+ * the "largest" box and deciding where to cut it.  The particular policies
+ * used here have proved out well in experimental comparisons, but better ones
+ * may yet be found.
+ *
+ * In earlier versions of the IJG code, this module quantized in YCbCr color
+ * space, processing the raw upsampled data without a color conversion step.
+ * This allowed the color conversion math to be done only once per colormap
+ * entry, not once per pixel.  However, that optimization precluded other
+ * useful optimizations (such as merging color conversion with upsampling)
+ * and it also interfered with desired capabilities such as quantizing to an
+ * externally-supplied colormap.  We have therefore abandoned that approach.
+ * The present code works in the post-conversion color space, typically RGB.
+ *
+ * To improve the visual quality of the results, we actually work in scaled
+ * RGB space, giving G distances more weight than R, and R in turn more than
+ * B.  To do everything in integer math, we must use integer scale factors.
+ * The 2/3/1 scale factors used here correspond loosely to the relative
+ * weights of the colors in the NTSC grayscale equation.
+ * If you want to use this code to quantize a non-RGB color space, you'll
+ * probably need to change these scale factors.
+ */
+
+#define R_SCALE 2		/* scale R distances by this much */
+#define G_SCALE 3		/* scale G distances by this much */
+#define B_SCALE 1		/* and B by this much */
+
+/* Relabel R/G/B as components 0/1/2, respecting the RGB ordering defined
+ * in jmorecfg.h.  As the code stands, it will do the right thing for R,G,B
+ * and B,G,R orders.  If you define some other weird order in jmorecfg.h,
+ * you'll get compile errors until you extend this logic.  In that case
+ * you'll probably want to tweak the histogram sizes too.
+ */
+
+#if RGB_RED == 0
+#define C0_SCALE R_SCALE
+#endif
+#if RGB_BLUE == 0
+#define C0_SCALE B_SCALE
+#endif
+#if RGB_GREEN == 1
+#define C1_SCALE G_SCALE
+#endif
+#if RGB_RED == 2
+#define C2_SCALE R_SCALE
+#endif
+#if RGB_BLUE == 2
+#define C2_SCALE B_SCALE
+#endif
+
+
+/*
+ * First we have the histogram data structure and routines for creating it.
+ *
+ * The number of bits of precision can be adjusted by changing these symbols.
+ * We recommend keeping 6 bits for G and 5 each for R and B.
+ * If you have plenty of memory and cycles, 6 bits all around gives marginally
+ * better results; if you are short of memory, 5 bits all around will save
+ * some space but degrade the results.
+ * To maintain a fully accurate histogram, we'd need to allocate a "long"
+ * (preferably unsigned long) for each cell.  In practice this is overkill;
+ * we can get by with 16 bits per cell.  Few of the cell counts will overflow,
+ * and clamping those that do overflow to the maximum value will give close-
+ * enough results.  This reduces the recommended histogram size from 256Kb
+ * to 128Kb, which is a useful savings on PC-class machines.
+ * (In the second pass the histogram space is re-used for pixel mapping data;
+ * in that capacity, each cell must be able to store zero to the number of
+ * desired colors.  16 bits/cell is plenty for that too.)
+ * Since the JPEG code is intended to run in small memory model on 80x86
+ * machines, we can't just allocate the histogram in one chunk.  Instead
+ * of a true 3-D array, we use a row of pointers to 2-D arrays.  Each
+ * pointer corresponds to a C0 value (typically 2^5 = 32 pointers) and
+ * each 2-D array has 2^6*2^5 = 2048 or 2^6*2^6 = 4096 entries.  Note that
+ * on 80x86 machines, the pointer row is in near memory but the actual
+ * arrays are in far memory (same arrangement as we use for image arrays).
+ */
+
+#define MAXNUMCOLORS  (MAXJSAMPLE+1) /* maximum size of colormap */
+
+/* These will do the right thing for either R,G,B or B,G,R color order,
+ * but you may not like the results for other color orders.
+ */
+#define HIST_C0_BITS  5		/* bits of precision in R/B histogram */
+#define HIST_C1_BITS  6		/* bits of precision in G histogram */
+#define HIST_C2_BITS  5		/* bits of precision in B/R histogram */
+
+/* Number of elements along histogram axes. */
+#define HIST_C0_ELEMS  (1<<HIST_C0_BITS)
+#define HIST_C1_ELEMS  (1<<HIST_C1_BITS)
+#define HIST_C2_ELEMS  (1<<HIST_C2_BITS)
+
+/* These are the amounts to shift an input value to get a histogram index. */
+#define C0_SHIFT  (BITS_IN_JSAMPLE-HIST_C0_BITS)
+#define C1_SHIFT  (BITS_IN_JSAMPLE-HIST_C1_BITS)
+#define C2_SHIFT  (BITS_IN_JSAMPLE-HIST_C2_BITS)
+
+
+typedef UINT16 histcell;	/* histogram cell; prefer an unsigned type */
+
+typedef histcell FAR * histptr;	/* for pointers to histogram cells */
+
+typedef histcell hist1d[HIST_C2_ELEMS]; /* typedefs for the array */
+typedef hist1d FAR * hist2d;	/* type for the 2nd-level pointers */
+typedef hist2d * hist3d;	/* type for top-level pointer */
+
+
+/* Declarations for Floyd-Steinberg dithering.
+ *
+ * Errors are accumulated into the array fserrors[], at a resolution of
+ * 1/16th of a pixel count.  The error at a given pixel is propagated
+ * to its not-yet-processed neighbors using the standard F-S fractions,
+ *		...	(here)	7/16
+ *		3/16	5/16	1/16
+ * We work left-to-right on even rows, right-to-left on odd rows.
+ *
+ * We can get away with a single array (holding one row's worth of errors)
+ * by using it to store the current row's errors at pixel columns not yet
+ * processed, but the next row's errors at columns already processed.  We
+ * need only a few extra variables to hold the errors immediately around the
+ * current column.  (If we are lucky, those variables are in registers, but
+ * even if not, they're probably cheaper to access than array elements are.)
+ *
+ * The fserrors[] array has (#columns + 2) entries; the extra entry at
+ * each end saves us from special-casing the first and last pixels.
+ * Each entry is three values long, one value for each color component.
+ *
+ * Note: on a wide image, we might not have enough room in a PC's near data
+ * segment to hold the error array; so it is allocated with alloc_large.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+typedef INT16 FSERROR;		/* 16 bits should be enough */
+typedef int LOCFSERROR;		/* use 'int' for calculation temps */
+#else
+typedef INT32 FSERROR;		/* may need more than 16 bits */
+typedef INT32 LOCFSERROR;	/* be sure calculation temps are big enough */
+#endif
+
+typedef FSERROR FAR *FSERRPTR;	/* pointer to error array (in FAR storage!) */
+
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_color_quantizer pub; /* public fields */
+
+  /* Space for the eventually created colormap is stashed here */
+  JSAMPARRAY sv_colormap;	/* colormap allocated at init time */
+  int desired;			/* desired # of colors = size of colormap */
+
+  /* Variables for accumulating image statistics */
+  hist3d histogram;		/* pointer to the histogram */
+
+  boolean needs_zeroed;		/* TRUE if next pass must zero histogram */
+
+  /* Variables for Floyd-Steinberg dithering */
+  FSERRPTR fserrors;		/* accumulated errors */
+  boolean on_odd_row;		/* flag to remember which row we are on */
+  int * error_limiter;		/* table for clamping the applied error */
+} my_cquantizer;
+
+typedef my_cquantizer * my_cquantize_ptr;
+
+
+/*
+ * Prescan some rows of pixels.
+ * In this module the prescan simply updates the histogram, which has been
+ * initialized to zeroes by start_pass.
+ * An output_buf parameter is required by the method signature, but no data
+ * is actually output (in fact the buffer controller is probably passing a
+ * NULL pointer).
+ */
+
+METHODDEF(void)
+prescan_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		  JSAMPARRAY output_buf, int num_rows)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register JSAMPROW ptr;
+  register histptr histp;
+  register hist3d histogram = cquantize->histogram;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    ptr = input_buf[row];
+    for (col = width; col > 0; col--) {
+      /* get pixel value and index into the histogram */
+      histp = & histogram[GETJSAMPLE(ptr[0]) >> C0_SHIFT]
+			 [GETJSAMPLE(ptr[1]) >> C1_SHIFT]
+			 [GETJSAMPLE(ptr[2]) >> C2_SHIFT];
+      /* increment, check for overflow and undo increment if so. */
+      if (++(*histp) <= 0)
+	(*histp)--;
+      ptr += 3;
+    }
+  }
+}
+
+
+/*
+ * Next we have the really interesting routines: selection of a colormap
+ * given the completed histogram.
+ * These routines work with a list of "boxes", each representing a rectangular
+ * subset of the input color space (to histogram precision).
+ */
+
+typedef struct {
+  /* The bounds of the box (inclusive); expressed as histogram indexes */
+  int c0min, c0max;
+  int c1min, c1max;
+  int c2min, c2max;
+  /* The volume (actually 2-norm) of the box */
+  INT32 volume;
+  /* The number of nonzero histogram cells within this box */
+  long colorcount;
+} box;
+
+typedef box * boxptr;
+
+
+LOCAL(boxptr)
+find_biggest_color_pop (boxptr boxlist, int numboxes)
+/* Find the splittable box with the largest color population */
+/* Returns NULL if no splittable boxes remain */
+{
+  register boxptr boxp;
+  register int i;
+  register long maxc = 0;
+  boxptr which = NULL;
+  
+  for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) {
+    if (boxp->colorcount > maxc && boxp->volume > 0) {
+      which = boxp;
+      maxc = boxp->colorcount;
+    }
+  }
+  return which;
+}
+
+
+LOCAL(boxptr)
+find_biggest_volume (boxptr boxlist, int numboxes)
+/* Find the splittable box with the largest (scaled) volume */
+/* Returns NULL if no splittable boxes remain */
+{
+  register boxptr boxp;
+  register int i;
+  register INT32 maxv = 0;
+  boxptr which = NULL;
+  
+  for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) {
+    if (boxp->volume > maxv) {
+      which = boxp;
+      maxv = boxp->volume;
+    }
+  }
+  return which;
+}
+
+
+LOCAL(void)
+update_box (j_decompress_ptr cinfo, boxptr boxp)
+/* Shrink the min/max bounds of a box to enclose only nonzero elements, */
+/* and recompute its volume and population */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  histptr histp;
+  int c0,c1,c2;
+  int c0min,c0max,c1min,c1max,c2min,c2max;
+  INT32 dist0,dist1,dist2;
+  long ccount;
+  
+  c0min = boxp->c0min;  c0max = boxp->c0max;
+  c1min = boxp->c1min;  c1max = boxp->c1max;
+  c2min = boxp->c2min;  c2max = boxp->c2max;
+  
+  if (c0max > c0min)
+    for (c0 = c0min; c0 <= c0max; c0++)
+      for (c1 = c1min; c1 <= c1max; c1++) {
+	histp = & histogram[c0][c1][c2min];
+	for (c2 = c2min; c2 <= c2max; c2++)
+	  if (*histp++ != 0) {
+	    boxp->c0min = c0min = c0;
+	    goto have_c0min;
+	  }
+      }
+ have_c0min:
+  if (c0max > c0min)
+    for (c0 = c0max; c0 >= c0min; c0--)
+      for (c1 = c1min; c1 <= c1max; c1++) {
+	histp = & histogram[c0][c1][c2min];
+	for (c2 = c2min; c2 <= c2max; c2++)
+	  if (*histp++ != 0) {
+	    boxp->c0max = c0max = c0;
+	    goto have_c0max;
+	  }
+      }
+ have_c0max:
+  if (c1max > c1min)
+    for (c1 = c1min; c1 <= c1max; c1++)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+	histp = & histogram[c0][c1][c2min];
+	for (c2 = c2min; c2 <= c2max; c2++)
+	  if (*histp++ != 0) {
+	    boxp->c1min = c1min = c1;
+	    goto have_c1min;
+	  }
+      }
+ have_c1min:
+  if (c1max > c1min)
+    for (c1 = c1max; c1 >= c1min; c1--)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+	histp = & histogram[c0][c1][c2min];
+	for (c2 = c2min; c2 <= c2max; c2++)
+	  if (*histp++ != 0) {
+	    boxp->c1max = c1max = c1;
+	    goto have_c1max;
+	  }
+      }
+ have_c1max:
+  if (c2max > c2min)
+    for (c2 = c2min; c2 <= c2max; c2++)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+	histp = & histogram[c0][c1min][c2];
+	for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
+	  if (*histp != 0) {
+	    boxp->c2min = c2min = c2;
+	    goto have_c2min;
+	  }
+      }
+ have_c2min:
+  if (c2max > c2min)
+    for (c2 = c2max; c2 >= c2min; c2--)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+	histp = & histogram[c0][c1min][c2];
+	for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
+	  if (*histp != 0) {
+	    boxp->c2max = c2max = c2;
+	    goto have_c2max;
+	  }
+      }
+ have_c2max:
+
+  /* Update box volume.
+   * We use 2-norm rather than real volume here; this biases the method
+   * against making long narrow boxes, and it has the side benefit that
+   * a box is splittable iff norm > 0.
+   * Since the differences are expressed in histogram-cell units,
+   * we have to shift back to JSAMPLE units to get consistent distances;
+   * after which, we scale according to the selected distance scale factors.
+   */
+  dist0 = ((c0max - c0min) << C0_SHIFT) * C0_SCALE;
+  dist1 = ((c1max - c1min) << C1_SHIFT) * C1_SCALE;
+  dist2 = ((c2max - c2min) << C2_SHIFT) * C2_SCALE;
+  boxp->volume = dist0*dist0 + dist1*dist1 + dist2*dist2;
+  
+  /* Now scan remaining volume of box and compute population */
+  ccount = 0;
+  for (c0 = c0min; c0 <= c0max; c0++)
+    for (c1 = c1min; c1 <= c1max; c1++) {
+      histp = & histogram[c0][c1][c2min];
+      for (c2 = c2min; c2 <= c2max; c2++, histp++)
+	if (*histp != 0) {
+	  ccount++;
+	}
+    }
+  boxp->colorcount = ccount;
+}
+
+
+LOCAL(int)
+median_cut (j_decompress_ptr cinfo, boxptr boxlist, int numboxes,
+	    int desired_colors)
+/* Repeatedly select and split the largest box until we have enough boxes */
+{
+  int n,lb;
+  int c0,c1,c2,cmax;
+  register boxptr b1,b2;
+
+  while (numboxes < desired_colors) {
+    /* Select box to split.
+     * Current algorithm: by population for first half, then by volume.
+     */
+    if (numboxes*2 <= desired_colors) {
+      b1 = find_biggest_color_pop(boxlist, numboxes);
+    } else {
+      b1 = find_biggest_volume(boxlist, numboxes);
+    }
+    if (b1 == NULL)		/* no splittable boxes left! */
+      break;
+    b2 = &boxlist[numboxes];	/* where new box will go */
+    /* Copy the color bounds to the new box. */
+    b2->c0max = b1->c0max; b2->c1max = b1->c1max; b2->c2max = b1->c2max;
+    b2->c0min = b1->c0min; b2->c1min = b1->c1min; b2->c2min = b1->c2min;
+    /* Choose which axis to split the box on.
+     * Current algorithm: longest scaled axis.
+     * See notes in update_box about scaling distances.
+     */
+    c0 = ((b1->c0max - b1->c0min) << C0_SHIFT) * C0_SCALE;
+    c1 = ((b1->c1max - b1->c1min) << C1_SHIFT) * C1_SCALE;
+    c2 = ((b1->c2max - b1->c2min) << C2_SHIFT) * C2_SCALE;
+    /* We want to break any ties in favor of green, then red, blue last.
+     * This code does the right thing for R,G,B or B,G,R color orders only.
+     */
+#if RGB_RED == 0
+    cmax = c1; n = 1;
+    if (c0 > cmax) { cmax = c0; n = 0; }
+    if (c2 > cmax) { n = 2; }
+#else
+    cmax = c1; n = 1;
+    if (c2 > cmax) { cmax = c2; n = 2; }
+    if (c0 > cmax) { n = 0; }
+#endif
+    /* Choose split point along selected axis, and update box bounds.
+     * Current algorithm: split at halfway point.
+     * (Since the box has been shrunk to minimum volume,
+     * any split will produce two nonempty subboxes.)
+     * Note that lb value is max for lower box, so must be < old max.
+     */
+    switch (n) {
+    case 0:
+      lb = (b1->c0max + b1->c0min) / 2;
+      b1->c0max = lb;
+      b2->c0min = lb+1;
+      break;
+    case 1:
+      lb = (b1->c1max + b1->c1min) / 2;
+      b1->c1max = lb;
+      b2->c1min = lb+1;
+      break;
+    case 2:
+      lb = (b1->c2max + b1->c2min) / 2;
+      b1->c2max = lb;
+      b2->c2min = lb+1;
+      break;
+    }
+    /* Update stats for boxes */
+    update_box(cinfo, b1);
+    update_box(cinfo, b2);
+    numboxes++;
+  }
+  return numboxes;
+}
+
+
+LOCAL(void)
+compute_color (j_decompress_ptr cinfo, boxptr boxp, int icolor)
+/* Compute representative color for a box, put it in colormap[icolor] */
+{
+  /* Current algorithm: mean weighted by pixels (not colors) */
+  /* Note it is important to get the rounding correct! */
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  histptr histp;
+  int c0,c1,c2;
+  int c0min,c0max,c1min,c1max,c2min,c2max;
+  long count;
+  long total = 0;
+  long c0total = 0;
+  long c1total = 0;
+  long c2total = 0;
+  
+  c0min = boxp->c0min;  c0max = boxp->c0max;
+  c1min = boxp->c1min;  c1max = boxp->c1max;
+  c2min = boxp->c2min;  c2max = boxp->c2max;
+  
+  for (c0 = c0min; c0 <= c0max; c0++)
+    for (c1 = c1min; c1 <= c1max; c1++) {
+      histp = & histogram[c0][c1][c2min];
+      for (c2 = c2min; c2 <= c2max; c2++) {
+	if ((count = *histp++) != 0) {
+	  total += count;
+	  c0total += ((c0 << C0_SHIFT) + ((1<<C0_SHIFT)>>1)) * count;
+	  c1total += ((c1 << C1_SHIFT) + ((1<<C1_SHIFT)>>1)) * count;
+	  c2total += ((c2 << C2_SHIFT) + ((1<<C2_SHIFT)>>1)) * count;
+	}
+      }
+    }
+  
+  cinfo->colormap[0][icolor] = (JSAMPLE) ((c0total + (total>>1)) / total);
+  cinfo->colormap[1][icolor] = (JSAMPLE) ((c1total + (total>>1)) / total);
+  cinfo->colormap[2][icolor] = (JSAMPLE) ((c2total + (total>>1)) / total);
+}
+
+
+LOCAL(void)
+select_colors (j_decompress_ptr cinfo, int desired_colors)
+/* Master routine for color selection */
+{
+  boxptr boxlist;
+  int numboxes;
+  int i;
+
+  /* Allocate workspace for box list */
+  boxlist = (boxptr) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, desired_colors * SIZEOF(box));
+  /* Initialize one box containing whole space */
+  numboxes = 1;
+  boxlist[0].c0min = 0;
+  boxlist[0].c0max = MAXJSAMPLE >> C0_SHIFT;
+  boxlist[0].c1min = 0;
+  boxlist[0].c1max = MAXJSAMPLE >> C1_SHIFT;
+  boxlist[0].c2min = 0;
+  boxlist[0].c2max = MAXJSAMPLE >> C2_SHIFT;
+  /* Shrink it to actually-used volume and set its statistics */
+  update_box(cinfo, & boxlist[0]);
+  /* Perform median-cut to produce final box list */
+  numboxes = median_cut(cinfo, boxlist, numboxes, desired_colors);
+  /* Compute the representative color for each box, fill colormap */
+  for (i = 0; i < numboxes; i++)
+    compute_color(cinfo, & boxlist[i], i);
+  cinfo->actual_number_of_colors = numboxes;
+  TRACEMS1(cinfo, 1, JTRC_QUANT_SELECTED, numboxes);
+}
+
+
+/*
+ * These routines are concerned with the time-critical task of mapping input
+ * colors to the nearest color in the selected colormap.
+ *
+ * We re-use the histogram space as an "inverse color map", essentially a
+ * cache for the results of nearest-color searches.  All colors within a
+ * histogram cell will be mapped to the same colormap entry, namely the one
+ * closest to the cell's center.  This may not be quite the closest entry to
+ * the actual input color, but it's almost as good.  A zero in the cache
+ * indicates we haven't found the nearest color for that cell yet; the array
+ * is cleared to zeroes before starting the mapping pass.  When we find the
+ * nearest color for a cell, its colormap index plus one is recorded in the
+ * cache for future use.  The pass2 scanning routines call fill_inverse_cmap
+ * when they need to use an unfilled entry in the cache.
+ *
+ * Our method of efficiently finding nearest colors is based on the "locally
+ * sorted search" idea described by Heckbert and on the incremental distance
+ * calculation described by Spencer W. Thomas in chapter III.1 of Graphics
+ * Gems II (James Arvo, ed.  Academic Press, 1991).  Thomas points out that
+ * the distances from a given colormap entry to each cell of the histogram can
+ * be computed quickly using an incremental method: the differences between
+ * distances to adjacent cells themselves differ by a constant.  This allows a
+ * fairly fast implementation of the "brute force" approach of computing the
+ * distance from every colormap entry to every histogram cell.  Unfortunately,
+ * it needs a work array to hold the best-distance-so-far for each histogram
+ * cell (because the inner loop has to be over cells, not colormap entries).
+ * The work array elements have to be INT32s, so the work array would need
+ * 256Kb at our recommended precision.  This is not feasible in DOS machines.
+ *
+ * To get around these problems, we apply Thomas' method to compute the
+ * nearest colors for only the cells within a small subbox of the histogram.
+ * The work array need be only as big as the subbox, so the memory usage
+ * problem is solved.  Furthermore, we need not fill subboxes that are never
+ * referenced in pass2; many images use only part of the color gamut, so a
+ * fair amount of work is saved.  An additional advantage of this
+ * approach is that we can apply Heckbert's locality criterion to quickly
+ * eliminate colormap entries that are far away from the subbox; typically
+ * three-fourths of the colormap entries are rejected by Heckbert's criterion,
+ * and we need not compute their distances to individual cells in the subbox.
+ * The speed of this approach is heavily influenced by the subbox size: too
+ * small means too much overhead, too big loses because Heckbert's criterion
+ * can't eliminate as many colormap entries.  Empirically the best subbox
+ * size seems to be about 1/512th of the histogram (1/8th in each direction).
+ *
+ * Thomas' article also describes a refined method which is asymptotically
+ * faster than the brute-force method, but it is also far more complex and
+ * cannot efficiently be applied to small subboxes.  It is therefore not
+ * useful for programs intended to be portable to DOS machines.  On machines
+ * with plenty of memory, filling the whole histogram in one shot with Thomas'
+ * refined method might be faster than the present code --- but then again,
+ * it might not be any faster, and it's certainly more complicated.
+ */
+
+
+/* log2(histogram cells in update box) for each axis; this can be adjusted */
+#define BOX_C0_LOG  (HIST_C0_BITS-3)
+#define BOX_C1_LOG  (HIST_C1_BITS-3)
+#define BOX_C2_LOG  (HIST_C2_BITS-3)
+
+#define BOX_C0_ELEMS  (1<<BOX_C0_LOG) /* # of hist cells in update box */
+#define BOX_C1_ELEMS  (1<<BOX_C1_LOG)
+#define BOX_C2_ELEMS  (1<<BOX_C2_LOG)
+
+#define BOX_C0_SHIFT  (C0_SHIFT + BOX_C0_LOG)
+#define BOX_C1_SHIFT  (C1_SHIFT + BOX_C1_LOG)
+#define BOX_C2_SHIFT  (C2_SHIFT + BOX_C2_LOG)
+
+
+/*
+ * The next three routines implement inverse colormap filling.  They could
+ * all be folded into one big routine, but splitting them up this way saves
+ * some stack space (the mindist[] and bestdist[] arrays need not coexist)
+ * and may allow some compilers to produce better code by registerizing more
+ * inner-loop variables.
+ */
+
+LOCAL(int)
+find_nearby_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
+		    JSAMPLE colorlist[])
+/* Locate the colormap entries close enough to an update box to be candidates
+ * for the nearest entry to some cell(s) in the update box.  The update box
+ * is specified by the center coordinates of its first cell.  The number of
+ * candidate colormap entries is returned, and their colormap indexes are
+ * placed in colorlist[].
+ * This routine uses Heckbert's "locally sorted search" criterion to select
+ * the colors that need further consideration.
+ */
+{
+  int numcolors = cinfo->actual_number_of_colors;
+  int maxc0, maxc1, maxc2;
+  int centerc0, centerc1, centerc2;
+  int i, x, ncolors;
+  INT32 minmaxdist, min_dist, max_dist, tdist;
+  INT32 mindist[MAXNUMCOLORS];	/* min distance to colormap entry i */
+
+  /* Compute true coordinates of update box's upper corner and center.
+   * Actually we compute the coordinates of the center of the upper-corner
+   * histogram cell, which are the upper bounds of the volume we care about.
+   * Note that since ">>" rounds down, the "center" values may be closer to
+   * min than to max; hence comparisons to them must be "<=", not "<".
+   */
+  maxc0 = minc0 + ((1 << BOX_C0_SHIFT) - (1 << C0_SHIFT));
+  centerc0 = (minc0 + maxc0) >> 1;
+  maxc1 = minc1 + ((1 << BOX_C1_SHIFT) - (1 << C1_SHIFT));
+  centerc1 = (minc1 + maxc1) >> 1;
+  maxc2 = minc2 + ((1 << BOX_C2_SHIFT) - (1 << C2_SHIFT));
+  centerc2 = (minc2 + maxc2) >> 1;
+
+  /* For each color in colormap, find:
+   *  1. its minimum squared-distance to any point in the update box
+   *     (zero if color is within update box);
+   *  2. its maximum squared-distance to any point in the update box.
+   * Both of these can be found by considering only the corners of the box.
+   * We save the minimum distance for each color in mindist[];
+   * only the smallest maximum distance is of interest.
+   */
+  minmaxdist = 0x7FFFFFFFL;
+
+  for (i = 0; i < numcolors; i++) {
+    /* We compute the squared-c0-distance term, then add in the other two. */
+    x = GETJSAMPLE(cinfo->colormap[0][i]);
+    if (x < minc0) {
+      tdist = (x - minc0) * C0_SCALE;
+      min_dist = tdist*tdist;
+      tdist = (x - maxc0) * C0_SCALE;
+      max_dist = tdist*tdist;
+    } else if (x > maxc0) {
+      tdist = (x - maxc0) * C0_SCALE;
+      min_dist = tdist*tdist;
+      tdist = (x - minc0) * C0_SCALE;
+      max_dist = tdist*tdist;
+    } else {
+      /* within cell range so no contribution to min_dist */
+      min_dist = 0;
+      if (x <= centerc0) {
+	tdist = (x - maxc0) * C0_SCALE;
+	max_dist = tdist*tdist;
+      } else {
+	tdist = (x - minc0) * C0_SCALE;
+	max_dist = tdist*tdist;
+      }
+    }
+
+    x = GETJSAMPLE(cinfo->colormap[1][i]);
+    if (x < minc1) {
+      tdist = (x - minc1) * C1_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - maxc1) * C1_SCALE;
+      max_dist += tdist*tdist;
+    } else if (x > maxc1) {
+      tdist = (x - maxc1) * C1_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - minc1) * C1_SCALE;
+      max_dist += tdist*tdist;
+    } else {
+      /* within cell range so no contribution to min_dist */
+      if (x <= centerc1) {
+	tdist = (x - maxc1) * C1_SCALE;
+	max_dist += tdist*tdist;
+      } else {
+	tdist = (x - minc1) * C1_SCALE;
+	max_dist += tdist*tdist;
+      }
+    }
+
+    x = GETJSAMPLE(cinfo->colormap[2][i]);
+    if (x < minc2) {
+      tdist = (x - minc2) * C2_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - maxc2) * C2_SCALE;
+      max_dist += tdist*tdist;
+    } else if (x > maxc2) {
+      tdist = (x - maxc2) * C2_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - minc2) * C2_SCALE;
+      max_dist += tdist*tdist;
+    } else {
+      /* within cell range so no contribution to min_dist */
+      if (x <= centerc2) {
+	tdist = (x - maxc2) * C2_SCALE;
+	max_dist += tdist*tdist;
+      } else {
+	tdist = (x - minc2) * C2_SCALE;
+	max_dist += tdist*tdist;
+      }
+    }
+
+    mindist[i] = min_dist;	/* save away the results */
+    if (max_dist < minmaxdist)
+      minmaxdist = max_dist;
+  }
+
+  /* Now we know that no cell in the update box is more than minmaxdist
+   * away from some colormap entry.  Therefore, only colors that are
+   * within minmaxdist of some part of the box need be considered.
+   */
+  ncolors = 0;
+  for (i = 0; i < numcolors; i++) {
+    if (mindist[i] <= minmaxdist)
+      colorlist[ncolors++] = (JSAMPLE) i;
+  }
+  return ncolors;
+}
+
+
+LOCAL(void)
+find_best_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
+		  int numcolors, JSAMPLE colorlist[], JSAMPLE bestcolor[])
+/* Find the closest colormap entry for each cell in the update box,
+ * given the list of candidate colors prepared by find_nearby_colors.
+ * Return the indexes of the closest entries in the bestcolor[] array.
+ * This routine uses Thomas' incremental distance calculation method to
+ * find the distance from a colormap entry to successive cells in the box.
+ */
+{
+  int ic0, ic1, ic2;
+  int i, icolor;
+  register INT32 * bptr;	/* pointer into bestdist[] array */
+  JSAMPLE * cptr;		/* pointer into bestcolor[] array */
+  INT32 dist0, dist1;		/* initial distance values */
+  register INT32 dist2;		/* current distance in inner loop */
+  INT32 xx0, xx1;		/* distance increments */
+  register INT32 xx2;
+  INT32 inc0, inc1, inc2;	/* initial values for increments */
+  /* This array holds the distance to the nearest-so-far color for each cell */
+  INT32 bestdist[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
+
+  /* Initialize best-distance for each cell of the update box */
+  bptr = bestdist;
+  for (i = BOX_C0_ELEMS*BOX_C1_ELEMS*BOX_C2_ELEMS-1; i >= 0; i--)
+    *bptr++ = 0x7FFFFFFFL;
+  
+  /* For each color selected by find_nearby_colors,
+   * compute its distance to the center of each cell in the box.
+   * If that's less than best-so-far, update best distance and color number.
+   */
+  
+  /* Nominal steps between cell centers ("x" in Thomas article) */
+#define STEP_C0  ((1 << C0_SHIFT) * C0_SCALE)
+#define STEP_C1  ((1 << C1_SHIFT) * C1_SCALE)
+#define STEP_C2  ((1 << C2_SHIFT) * C2_SCALE)
+  
+  for (i = 0; i < numcolors; i++) {
+    icolor = GETJSAMPLE(colorlist[i]);
+    /* Compute (square of) distance from minc0/c1/c2 to this color */
+    inc0 = (minc0 - GETJSAMPLE(cinfo->colormap[0][icolor])) * C0_SCALE;
+    dist0 = inc0*inc0;
+    inc1 = (minc1 - GETJSAMPLE(cinfo->colormap[1][icolor])) * C1_SCALE;
+    dist0 += inc1*inc1;
+    inc2 = (minc2 - GETJSAMPLE(cinfo->colormap[2][icolor])) * C2_SCALE;
+    dist0 += inc2*inc2;
+    /* Form the initial difference increments */
+    inc0 = inc0 * (2 * STEP_C0) + STEP_C0 * STEP_C0;
+    inc1 = inc1 * (2 * STEP_C1) + STEP_C1 * STEP_C1;
+    inc2 = inc2 * (2 * STEP_C2) + STEP_C2 * STEP_C2;
+    /* Now loop over all cells in box, updating distance per Thomas method */
+    bptr = bestdist;
+    cptr = bestcolor;
+    xx0 = inc0;
+    for (ic0 = BOX_C0_ELEMS-1; ic0 >= 0; ic0--) {
+      dist1 = dist0;
+      xx1 = inc1;
+      for (ic1 = BOX_C1_ELEMS-1; ic1 >= 0; ic1--) {
+	dist2 = dist1;
+	xx2 = inc2;
+	for (ic2 = BOX_C2_ELEMS-1; ic2 >= 0; ic2--) {
+	  if (dist2 < *bptr) {
+	    *bptr = dist2;
+	    *cptr = (JSAMPLE) icolor;
+	  }
+	  dist2 += xx2;
+	  xx2 += 2 * STEP_C2 * STEP_C2;
+	  bptr++;
+	  cptr++;
+	}
+	dist1 += xx1;
+	xx1 += 2 * STEP_C1 * STEP_C1;
+      }
+      dist0 += xx0;
+      xx0 += 2 * STEP_C0 * STEP_C0;
+    }
+  }
+}
+
+
+LOCAL(void)
+fill_inverse_cmap (j_decompress_ptr cinfo, int c0, int c1, int c2)
+/* Fill the inverse-colormap entries in the update box that contains */
+/* histogram cell c0/c1/c2.  (Only that one cell MUST be filled, but */
+/* we can fill as many others as we wish.) */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  int minc0, minc1, minc2;	/* lower left corner of update box */
+  int ic0, ic1, ic2;
+  register JSAMPLE * cptr;	/* pointer into bestcolor[] array */
+  register histptr cachep;	/* pointer into main cache array */
+  /* This array lists the candidate colormap indexes. */
+  JSAMPLE colorlist[MAXNUMCOLORS];
+  int numcolors;		/* number of candidate colors */
+  /* This array holds the actually closest colormap index for each cell. */
+  JSAMPLE bestcolor[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
+
+  /* Convert cell coordinates to update box ID */
+  c0 >>= BOX_C0_LOG;
+  c1 >>= BOX_C1_LOG;
+  c2 >>= BOX_C2_LOG;
+
+  /* Compute true coordinates of update box's origin corner.
+   * Actually we compute the coordinates of the center of the corner
+   * histogram cell, which are the lower bounds of the volume we care about.
+   */
+  minc0 = (c0 << BOX_C0_SHIFT) + ((1 << C0_SHIFT) >> 1);
+  minc1 = (c1 << BOX_C1_SHIFT) + ((1 << C1_SHIFT) >> 1);
+  minc2 = (c2 << BOX_C2_SHIFT) + ((1 << C2_SHIFT) >> 1);
+  
+  /* Determine which colormap entries are close enough to be candidates
+   * for the nearest entry to some cell in the update box.
+   */
+  numcolors = find_nearby_colors(cinfo, minc0, minc1, minc2, colorlist);
+
+  /* Determine the actually nearest colors. */
+  find_best_colors(cinfo, minc0, minc1, minc2, numcolors, colorlist,
+		   bestcolor);
+
+  /* Save the best color numbers (plus 1) in the main cache array */
+  c0 <<= BOX_C0_LOG;		/* convert ID back to base cell indexes */
+  c1 <<= BOX_C1_LOG;
+  c2 <<= BOX_C2_LOG;
+  cptr = bestcolor;
+  for (ic0 = 0; ic0 < BOX_C0_ELEMS; ic0++) {
+    for (ic1 = 0; ic1 < BOX_C1_ELEMS; ic1++) {
+      cachep = & histogram[c0+ic0][c1+ic1][c2];
+      for (ic2 = 0; ic2 < BOX_C2_ELEMS; ic2++) {
+	*cachep++ = (histcell) (GETJSAMPLE(*cptr++) + 1);
+      }
+    }
+  }
+}
+
+
+/*
+ * Map some rows of pixels to the output colormapped representation.
+ */
+
+METHODDEF(void)
+pass2_no_dither (j_decompress_ptr cinfo,
+		 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
+/* This version performs no dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  register JSAMPROW inptr, outptr;
+  register histptr cachep;
+  register int c0, c1, c2;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    inptr = input_buf[row];
+    outptr = output_buf[row];
+    for (col = width; col > 0; col--) {
+      /* get pixel value and index into the cache */
+      c0 = GETJSAMPLE(*inptr++) >> C0_SHIFT;
+      c1 = GETJSAMPLE(*inptr++) >> C1_SHIFT;
+      c2 = GETJSAMPLE(*inptr++) >> C2_SHIFT;
+      cachep = & histogram[c0][c1][c2];
+      /* If we have not seen this color before, find nearest colormap entry */
+      /* and update the cache */
+      if (*cachep == 0)
+	fill_inverse_cmap(cinfo, c0,c1,c2);
+      /* Now emit the colormap index for this cell */
+      *outptr++ = (JSAMPLE) (*cachep - 1);
+    }
+  }
+}
+
+
+METHODDEF(void)
+pass2_fs_dither (j_decompress_ptr cinfo,
+		 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
+/* This version performs Floyd-Steinberg dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  register LOCFSERROR cur0, cur1, cur2;	/* current error or pixel value */
+  LOCFSERROR belowerr0, belowerr1, belowerr2; /* error for pixel below cur */
+  LOCFSERROR bpreverr0, bpreverr1, bpreverr2; /* error for below/prev col */
+  register FSERRPTR errorptr;	/* => fserrors[] at column before current */
+  JSAMPROW inptr;		/* => current input pixel */
+  JSAMPROW outptr;		/* => current output pixel */
+  histptr cachep;
+  int dir;			/* +1 or -1 depending on direction */
+  int dir3;			/* 3*dir, for advancing inptr & errorptr */
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+  JSAMPLE *range_limit = cinfo->sample_range_limit;
+  int *error_limit = cquantize->error_limiter;
+  JSAMPROW colormap0 = cinfo->colormap[0];
+  JSAMPROW colormap1 = cinfo->colormap[1];
+  JSAMPROW colormap2 = cinfo->colormap[2];
+  SHIFT_TEMPS
+
+  for (row = 0; row < num_rows; row++) {
+    inptr = input_buf[row];
+    outptr = output_buf[row];
+    if (cquantize->on_odd_row) {
+      /* work right to left in this row */
+      inptr += (width-1) * 3;	/* so point to rightmost pixel */
+      outptr += width-1;
+      dir = -1;
+      dir3 = -3;
+      errorptr = cquantize->fserrors + (width+1)*3; /* => entry after last column */
+      cquantize->on_odd_row = FALSE; /* flip for next time */
+    } else {
+      /* work left to right in this row */
+      dir = 1;
+      dir3 = 3;
+      errorptr = cquantize->fserrors; /* => entry before first real column */
+      cquantize->on_odd_row = TRUE; /* flip for next time */
+    }
+    /* Preset error values: no error propagated to first pixel from left */
+    cur0 = cur1 = cur2 = 0;
+    /* and no error propagated to row below yet */
+    belowerr0 = belowerr1 = belowerr2 = 0;
+    bpreverr0 = bpreverr1 = bpreverr2 = 0;
+
+    for (col = width; col > 0; col--) {
+      /* curN holds the error propagated from the previous pixel on the
+       * current line.  Add the error propagated from the previous line
+       * to form the complete error correction term for this pixel, and
+       * round the error term (which is expressed * 16) to an integer.
+       * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct
+       * for either sign of the error value.
+       * Note: errorptr points to *previous* column's array entry.
+       */
+      cur0 = RIGHT_SHIFT(cur0 + errorptr[dir3+0] + 8, 4);
+      cur1 = RIGHT_SHIFT(cur1 + errorptr[dir3+1] + 8, 4);
+      cur2 = RIGHT_SHIFT(cur2 + errorptr[dir3+2] + 8, 4);
+      /* Limit the error using transfer function set by init_error_limit.
+       * See comments with init_error_limit for rationale.
+       */
+      cur0 = error_limit[cur0];
+      cur1 = error_limit[cur1];
+      cur2 = error_limit[cur2];
+      /* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
+       * The maximum error is +- MAXJSAMPLE (or less with error limiting);
+       * this sets the required size of the range_limit array.
+       */
+      cur0 += GETJSAMPLE(inptr[0]);
+      cur1 += GETJSAMPLE(inptr[1]);
+      cur2 += GETJSAMPLE(inptr[2]);
+      cur0 = GETJSAMPLE(range_limit[cur0]);
+      cur1 = GETJSAMPLE(range_limit[cur1]);
+      cur2 = GETJSAMPLE(range_limit[cur2]);
+      /* Index into the cache with adjusted pixel value */
+      cachep = & histogram[cur0>>C0_SHIFT][cur1>>C1_SHIFT][cur2>>C2_SHIFT];
+      /* If we have not seen this color before, find nearest colormap */
+      /* entry and update the cache */
+      if (*cachep == 0)
+	fill_inverse_cmap(cinfo, cur0>>C0_SHIFT,cur1>>C1_SHIFT,cur2>>C2_SHIFT);
+      /* Now emit the colormap index for this cell */
+      { register int pixcode = *cachep - 1;
+	*outptr = (JSAMPLE) pixcode;
+	/* Compute representation error for this pixel */
+	cur0 -= GETJSAMPLE(colormap0[pixcode]);
+	cur1 -= GETJSAMPLE(colormap1[pixcode]);
+	cur2 -= GETJSAMPLE(colormap2[pixcode]);
+      }
+      /* Compute error fractions to be propagated to adjacent pixels.
+       * Add these into the running sums, and simultaneously shift the
+       * next-line error sums left by 1 column.
+       */
+      { register LOCFSERROR bnexterr, delta;
+
+	bnexterr = cur0;	/* Process component 0 */
+	delta = cur0 * 2;
+	cur0 += delta;		/* form error * 3 */
+	errorptr[0] = (FSERROR) (bpreverr0 + cur0);
+	cur0 += delta;		/* form error * 5 */
+	bpreverr0 = belowerr0 + cur0;
+	belowerr0 = bnexterr;
+	cur0 += delta;		/* form error * 7 */
+	bnexterr = cur1;	/* Process component 1 */
+	delta = cur1 * 2;
+	cur1 += delta;		/* form error * 3 */
+	errorptr[1] = (FSERROR) (bpreverr1 + cur1);
+	cur1 += delta;		/* form error * 5 */
+	bpreverr1 = belowerr1 + cur1;
+	belowerr1 = bnexterr;
+	cur1 += delta;		/* form error * 7 */
+	bnexterr = cur2;	/* Process component 2 */
+	delta = cur2 * 2;
+	cur2 += delta;		/* form error * 3 */
+	errorptr[2] = (FSERROR) (bpreverr2 + cur2);
+	cur2 += delta;		/* form error * 5 */
+	bpreverr2 = belowerr2 + cur2;
+	belowerr2 = bnexterr;
+	cur2 += delta;		/* form error * 7 */
+      }
+      /* At this point curN contains the 7/16 error value to be propagated
+       * to the next pixel on the current line, and all the errors for the
+       * next line have been shifted over.  We are therefore ready to move on.
+       */
+      inptr += dir3;		/* Advance pixel pointers to next column */
+      outptr += dir;
+      errorptr += dir3;		/* advance errorptr to current column */
+    }
+    /* Post-loop cleanup: we must unload the final error values into the
+     * final fserrors[] entry.  Note we need not unload belowerrN because
+     * it is for the dummy column before or after the actual array.
+     */
+    errorptr[0] = (FSERROR) bpreverr0; /* unload prev errs into array */
+    errorptr[1] = (FSERROR) bpreverr1;
+    errorptr[2] = (FSERROR) bpreverr2;
+  }
+}
+
+
+/*
+ * Initialize the error-limiting transfer function (lookup table).
+ * The raw F-S error computation can potentially compute error values of up to
+ * +- MAXJSAMPLE.  But we want the maximum correction applied to a pixel to be
+ * much less, otherwise obviously wrong pixels will be created.  (Typical
+ * effects include weird fringes at color-area boundaries, isolated bright
+ * pixels in a dark area, etc.)  The standard advice for avoiding this problem
+ * is to ensure that the "corners" of the color cube are allocated as output
+ * colors; then repeated errors in the same direction cannot cause cascading
+ * error buildup.  However, that only prevents the error from getting
+ * completely out of hand; Aaron Giles reports that error limiting improves
+ * the results even with corner colors allocated.
+ * A simple clamping of the error values to about +- MAXJSAMPLE/8 works pretty
+ * well, but the smoother transfer function used below is even better.  Thanks
+ * to Aaron Giles for this idea.
+ */
+
+LOCAL(void)
+init_error_limit (j_decompress_ptr cinfo)
+/* Allocate and fill in the error_limiter table */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  int * table;
+  int in, out;
+
+  table = (int *) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE*2+1) * SIZEOF(int));
+  table += MAXJSAMPLE;		/* so can index -MAXJSAMPLE .. +MAXJSAMPLE */
+  cquantize->error_limiter = table;
+
+#define STEPSIZE ((MAXJSAMPLE+1)/16)
+  /* Map errors 1:1 up to +- MAXJSAMPLE/16 */
+  out = 0;
+  for (in = 0; in < STEPSIZE; in++, out++) {
+    table[in] = out; table[-in] = -out;
+  }
+  /* Map errors 1:2 up to +- 3*MAXJSAMPLE/16 */
+  for (; in < STEPSIZE*3; in++, out += (in&1) ? 0 : 1) {
+    table[in] = out; table[-in] = -out;
+  }
+  /* Clamp the rest to final out value (which is (MAXJSAMPLE+1)/8) */
+  for (; in <= MAXJSAMPLE; in++) {
+    table[in] = out; table[-in] = -out;
+  }
+#undef STEPSIZE
+}
+
+
+/*
+ * Finish up at the end of each pass.
+ */
+
+METHODDEF(void)
+finish_pass1 (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+
+  /* Select the representative colors and fill in cinfo->colormap */
+  cinfo->colormap = cquantize->sv_colormap;
+  select_colors(cinfo, cquantize->desired);
+  /* Force next pass to zero the color index table */
+  cquantize->needs_zeroed = TRUE;
+}
+
+
+METHODDEF(void)
+finish_pass2 (j_decompress_ptr cinfo)
+{
+  /* no work */
+}
+
+
+/*
+ * Initialize for each processing pass.
+ */
+
+METHODDEF(void)
+start_pass_2_quant (j_decompress_ptr cinfo, boolean is_pre_scan)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  int i;
+
+  /* Only F-S dithering or no dithering is supported. */
+  /* If user asks for ordered dither, give him F-S. */
+  if (cinfo->dither_mode != JDITHER_NONE)
+    cinfo->dither_mode = JDITHER_FS;
+
+  if (is_pre_scan) {
+    /* Set up method pointers */
+    cquantize->pub.color_quantize = prescan_quantize;
+    cquantize->pub.finish_pass = finish_pass1;
+    cquantize->needs_zeroed = TRUE; /* Always zero histogram */
+  } else {
+    /* Set up method pointers */
+    if (cinfo->dither_mode == JDITHER_FS)
+      cquantize->pub.color_quantize = pass2_fs_dither;
+    else
+      cquantize->pub.color_quantize = pass2_no_dither;
+    cquantize->pub.finish_pass = finish_pass2;
+
+    /* Make sure color count is acceptable */
+    i = cinfo->actual_number_of_colors;
+    if (i < 1)
+      ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, 1);
+    if (i > MAXNUMCOLORS)
+      ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXNUMCOLORS);
+
+    if (cinfo->dither_mode == JDITHER_FS) {
+      size_t arraysize = (size_t) ((cinfo->output_width + 2) *
+				   (3 * SIZEOF(FSERROR)));
+      /* Allocate Floyd-Steinberg workspace if we didn't already. */
+      if (cquantize->fserrors == NULL)
+	cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large)
+	  ((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize);
+      /* Initialize the propagated errors to zero. */
+      FMEMZERO((void FAR *) cquantize->fserrors, arraysize);
+      /* Make the error-limit table if we didn't already. */
+      if (cquantize->error_limiter == NULL)
+	init_error_limit(cinfo);
+      cquantize->on_odd_row = FALSE;
+    }
+
+  }
+  /* Zero the histogram or inverse color map, if necessary */
+  if (cquantize->needs_zeroed) {
+    for (i = 0; i < HIST_C0_ELEMS; i++) {
+      FMEMZERO((void FAR *) histogram[i],
+	       HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell));
+    }
+    cquantize->needs_zeroed = FALSE;
+  }
+}
+
+
+/*
+ * Switch to a new external colormap between output passes.
+ */
+
+METHODDEF(void)
+new_color_map_2_quant (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+
+  /* Reset the inverse color map */
+  cquantize->needs_zeroed = TRUE;
+}
+
+
+/*
+ * Module initialization routine for 2-pass color quantization.
+ */
+
+GLOBAL(void)
+jinit_2pass_quantizer (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize;
+  int i;
+
+  cquantize = (my_cquantize_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_cquantizer));
+  cinfo->cquantize = (struct jpeg_color_quantizer *) cquantize;
+  cquantize->pub.start_pass = start_pass_2_quant;
+  cquantize->pub.new_color_map = new_color_map_2_quant;
+  cquantize->fserrors = NULL;	/* flag optional arrays not allocated */
+  cquantize->error_limiter = NULL;
+
+  /* Make sure jdmaster didn't give me a case I can't handle */
+  if (cinfo->out_color_components != 3)
+    ERREXIT(cinfo, JERR_NOTIMPL);
+
+  /* Allocate the histogram/inverse colormap storage */
+  cquantize->histogram = (hist3d) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, HIST_C0_ELEMS * SIZEOF(hist2d));
+  for (i = 0; i < HIST_C0_ELEMS; i++) {
+    cquantize->histogram[i] = (hist2d) (*cinfo->mem->alloc_large)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell));
+  }
+  cquantize->needs_zeroed = TRUE; /* histogram is garbage now */
+
+  /* Allocate storage for the completed colormap, if required.
+   * We do this now since it is FAR storage and may affect
+   * the memory manager's space calculations.
+   */
+  if (cinfo->enable_2pass_quant) {
+    /* Make sure color count is acceptable */
+    int desired = cinfo->desired_number_of_colors;
+    /* Lower bound on # of colors ... somewhat arbitrary as long as > 0 */
+    if (desired < 8)
+      ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, 8);
+    /* Make sure colormap indexes can be represented by JSAMPLEs */
+    if (desired > MAXNUMCOLORS)
+      ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXNUMCOLORS);
+    cquantize->sv_colormap = (*cinfo->mem->alloc_sarray)
+      ((j_common_ptr) cinfo,JPOOL_IMAGE, (JDIMENSION) desired, (JDIMENSION) 3);
+    cquantize->desired = desired;
+  } else
+    cquantize->sv_colormap = NULL;
+
+  /* Only F-S dithering or no dithering is supported. */
+  /* If user asks for ordered dither, give him F-S. */
+  if (cinfo->dither_mode != JDITHER_NONE)
+    cinfo->dither_mode = JDITHER_FS;
+
+  /* Allocate Floyd-Steinberg workspace if necessary.
+   * This isn't really needed until pass 2, but again it is FAR storage.
+   * Although we will cope with a later change in dither_mode,
+   * we do not promise to honor max_memory_to_use if dither_mode changes.
+   */
+  if (cinfo->dither_mode == JDITHER_FS) {
+    cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       (size_t) ((cinfo->output_width + 2) * (3 * SIZEOF(FSERROR))));
+    /* Might as well create the error-limiting table too. */
+    init_error_limit(cinfo);
+  }
+}
+
+#endif /* QUANT_2PASS_SUPPORTED */
diff --git a/libraries/jpeg/jutils.c b/libraries/jpeg/jutils.c
new file mode 100644
index 000000000..5b16b6d03
--- /dev/null
+++ b/libraries/jpeg/jutils.c
@@ -0,0 +1,227 @@
+/*
+ * jutils.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * Modified 2009-2011 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains tables and miscellaneous utility routines needed
+ * for both compression and decompression.
+ * Note we prefix all global names with "j" to minimize conflicts with
+ * a surrounding application.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * jpeg_zigzag_order[i] is the zigzag-order position of the i'th element
+ * of a DCT block read in natural order (left to right, top to bottom).
+ */
+
+#if 0				/* This table is not actually needed in v6a */
+
+const int jpeg_zigzag_order[DCTSIZE2] = {
+   0,  1,  5,  6, 14, 15, 27, 28,
+   2,  4,  7, 13, 16, 26, 29, 42,
+   3,  8, 12, 17, 25, 30, 41, 43,
+   9, 11, 18, 24, 31, 40, 44, 53,
+  10, 19, 23, 32, 39, 45, 52, 54,
+  20, 22, 33, 38, 46, 51, 55, 60,
+  21, 34, 37, 47, 50, 56, 59, 61,
+  35, 36, 48, 49, 57, 58, 62, 63
+};
+
+#endif
+
+/*
+ * jpeg_natural_order[i] is the natural-order position of the i'th element
+ * of zigzag order.
+ *
+ * When reading corrupted data, the Huffman decoders could attempt
+ * to reference an entry beyond the end of this array (if the decoded
+ * zero run length reaches past the end of the block).  To prevent
+ * wild stores without adding an inner-loop test, we put some extra
+ * "63"s after the real entries.  This will cause the extra coefficient
+ * to be stored in location 63 of the block, not somewhere random.
+ * The worst case would be a run-length of 15, which means we need 16
+ * fake entries.
+ */
+
+const int jpeg_natural_order[DCTSIZE2+16] = {
+  0,  1,  8, 16,  9,  2,  3, 10,
+ 17, 24, 32, 25, 18, 11,  4,  5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13,  6,  7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63,
+ 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
+ 63, 63, 63, 63, 63, 63, 63, 63
+};
+
+const int jpeg_natural_order7[7*7+16] = {
+  0,  1,  8, 16,  9,  2,  3, 10,
+ 17, 24, 32, 25, 18, 11,  4,  5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13,  6, 14, 21, 28, 35,
+ 42, 49, 50, 43, 36, 29, 22, 30,
+ 37, 44, 51, 52, 45, 38, 46, 53,
+ 54,
+ 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
+ 63, 63, 63, 63, 63, 63, 63, 63
+};
+
+const int jpeg_natural_order6[6*6+16] = {
+  0,  1,  8, 16,  9,  2,  3, 10,
+ 17, 24, 32, 25, 18, 11,  4,  5,
+ 12, 19, 26, 33, 40, 41, 34, 27,
+ 20, 13, 21, 28, 35, 42, 43, 36,
+ 29, 37, 44, 45,
+ 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
+ 63, 63, 63, 63, 63, 63, 63, 63
+};
+
+const int jpeg_natural_order5[5*5+16] = {
+  0,  1,  8, 16,  9,  2,  3, 10,
+ 17, 24, 32, 25, 18, 11,  4, 12,
+ 19, 26, 33, 34, 27, 20, 28, 35,
+ 36,
+ 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
+ 63, 63, 63, 63, 63, 63, 63, 63
+};
+
+const int jpeg_natural_order4[4*4+16] = {
+  0,  1,  8, 16,  9,  2,  3, 10,
+ 17, 24, 25, 18, 11, 19, 26, 27,
+ 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
+ 63, 63, 63, 63, 63, 63, 63, 63
+};
+
+const int jpeg_natural_order3[3*3+16] = {
+  0,  1,  8, 16,  9,  2, 10, 17,
+ 18,
+ 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
+ 63, 63, 63, 63, 63, 63, 63, 63
+};
+
+const int jpeg_natural_order2[2*2+16] = {
+  0,  1,  8,  9,
+ 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
+ 63, 63, 63, 63, 63, 63, 63, 63
+};
+
+
+/*
+ * Arithmetic utilities
+ */
+
+GLOBAL(long)
+jdiv_round_up (long a, long b)
+/* Compute a/b rounded up to next integer, ie, ceil(a/b) */
+/* Assumes a >= 0, b > 0 */
+{
+  return (a + b - 1L) / b;
+}
+
+
+GLOBAL(long)
+jround_up (long a, long b)
+/* Compute a rounded up to next multiple of b, ie, ceil(a/b)*b */
+/* Assumes a >= 0, b > 0 */
+{
+  a += b - 1L;
+  return a - (a % b);
+}
+
+
+/* On normal machines we can apply MEMCOPY() and MEMZERO() to sample arrays
+ * and coefficient-block arrays.  This won't work on 80x86 because the arrays
+ * are FAR and we're assuming a small-pointer memory model.  However, some
+ * DOS compilers provide far-pointer versions of memcpy() and memset() even
+ * in the small-model libraries.  These will be used if USE_FMEM is defined.
+ * Otherwise, the routines below do it the hard way.  (The performance cost
+ * is not all that great, because these routines aren't very heavily used.)
+ */
+
+#ifndef NEED_FAR_POINTERS	/* normal case, same as regular macro */
+#define FMEMCOPY(dest,src,size)	MEMCOPY(dest,src,size)
+#else				/* 80x86 case, define if we can */
+#ifdef USE_FMEM
+#define FMEMCOPY(dest,src,size)	_fmemcpy((void FAR *)(dest), (const void FAR *)(src), (size_t)(size))
+#else
+/* This function is for use by the FMEMZERO macro defined in jpegint.h.
+ * Do not call this function directly, use the FMEMZERO macro instead.
+ */
+GLOBAL(void)
+jzero_far (void FAR * target, size_t bytestozero)
+/* Zero out a chunk of FAR memory. */
+/* This might be sample-array data, block-array data, or alloc_large data. */
+{
+  register char FAR * ptr = (char FAR *) target;
+  register size_t count;
+
+  for (count = bytestozero; count > 0; count--) {
+    *ptr++ = 0;
+  }
+}
+#endif
+#endif
+
+
+GLOBAL(void)
+jcopy_sample_rows (JSAMPARRAY input_array, int source_row,
+		   JSAMPARRAY output_array, int dest_row,
+		   int num_rows, JDIMENSION num_cols)
+/* Copy some rows of samples from one place to another.
+ * num_rows rows are copied from input_array[source_row++]
+ * to output_array[dest_row++]; these areas may overlap for duplication.
+ * The source and destination arrays must be at least as wide as num_cols.
+ */
+{
+  register JSAMPROW inptr, outptr;
+#ifdef FMEMCOPY
+  register size_t count = (size_t) (num_cols * SIZEOF(JSAMPLE));
+#else
+  register JDIMENSION count;
+#endif
+  register int row;
+
+  input_array += source_row;
+  output_array += dest_row;
+
+  for (row = num_rows; row > 0; row--) {
+    inptr = *input_array++;
+    outptr = *output_array++;
+#ifdef FMEMCOPY
+    FMEMCOPY(outptr, inptr, count);
+#else
+    for (count = num_cols; count > 0; count--)
+      *outptr++ = *inptr++;	/* needn't bother with GETJSAMPLE() here */
+#endif
+  }
+}
+
+
+GLOBAL(void)
+jcopy_block_row (JBLOCKROW input_row, JBLOCKROW output_row,
+		 JDIMENSION num_blocks)
+/* Copy a row of coefficient blocks from one place to another. */
+{
+#ifdef FMEMCOPY
+  FMEMCOPY(output_row, input_row, num_blocks * (DCTSIZE2 * SIZEOF(JCOEF)));
+#else
+  register JCOEFPTR inptr, outptr;
+  register long count;
+
+  inptr = (JCOEFPTR) input_row;
+  outptr = (JCOEFPTR) output_row;
+  for (count = (long) num_blocks * DCTSIZE2; count > 0; count--) {
+    *outptr++ = *inptr++;
+  }
+#endif
+}
diff --git a/libraries/jpeg/jversion.h b/libraries/jpeg/jversion.h
new file mode 100644
index 000000000..d096384f7
--- /dev/null
+++ b/libraries/jpeg/jversion.h
@@ -0,0 +1,14 @@
+/*
+ * jversion.h
+ *
+ * Copyright (C) 1991-2018, Thomas G. Lane, Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains software version identification.
+ */
+
+
+#define JVERSION	"9c  14-Jan-2018"
+
+#define JCOPYRIGHT	"Copyright (C) 2018, Thomas G. Lane, Guido Vollbeding"
diff --git a/libraries/lzma/C/7z.h b/libraries/lzma/C/7z.h
new file mode 100644
index 000000000..6c7886e38
--- /dev/null
+++ b/libraries/lzma/C/7z.h
@@ -0,0 +1,202 @@
+/* 7z.h -- 7z interface
+2017-04-03 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_H
+#define __7Z_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+#define k7zStartHeaderSize 0x20
+#define k7zSignatureSize 6
+
+extern const Byte k7zSignature[k7zSignatureSize];
+
+typedef struct
+{
+  const Byte *Data;
+  size_t Size;
+} CSzData;
+
+/* CSzCoderInfo & CSzFolder support only default methods */
+
+typedef struct
+{
+  size_t PropsOffset;
+  UInt32 MethodID;
+  Byte NumStreams;
+  Byte PropsSize;
+} CSzCoderInfo;
+
+typedef struct
+{
+  UInt32 InIndex;
+  UInt32 OutIndex;
+} CSzBond;
+
+#define SZ_NUM_CODERS_IN_FOLDER_MAX 4
+#define SZ_NUM_BONDS_IN_FOLDER_MAX 3
+#define SZ_NUM_PACK_STREAMS_IN_FOLDER_MAX 4
+
+typedef struct
+{
+  UInt32 NumCoders;
+  UInt32 NumBonds;
+  UInt32 NumPackStreams;
+  UInt32 UnpackStream;
+  UInt32 PackStreams[SZ_NUM_PACK_STREAMS_IN_FOLDER_MAX];
+  CSzBond Bonds[SZ_NUM_BONDS_IN_FOLDER_MAX];
+  CSzCoderInfo Coders[SZ_NUM_CODERS_IN_FOLDER_MAX];
+} CSzFolder;
+
+
+SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd);
+
+typedef struct
+{
+  UInt32 Low;
+  UInt32 High;
+} CNtfsFileTime;
+
+typedef struct
+{
+  Byte *Defs; /* MSB 0 bit numbering */
+  UInt32 *Vals;
+} CSzBitUi32s;
+
+typedef struct
+{
+  Byte *Defs; /* MSB 0 bit numbering */
+  // UInt64 *Vals;
+  CNtfsFileTime *Vals;
+} CSzBitUi64s;
+
+#define SzBitArray_Check(p, i) (((p)[(i) >> 3] & (0x80 >> ((i) & 7))) != 0)
+
+#define SzBitWithVals_Check(p, i) ((p)->Defs && ((p)->Defs[(i) >> 3] & (0x80 >> ((i) & 7))) != 0)
+
+typedef struct
+{
+  UInt32 NumPackStreams;
+  UInt32 NumFolders;
+
+  UInt64 *PackPositions;          // NumPackStreams + 1
+  CSzBitUi32s FolderCRCs;         // NumFolders
+
+  size_t *FoCodersOffsets;        // NumFolders + 1
+  UInt32 *FoStartPackStreamIndex; // NumFolders + 1
+  UInt32 *FoToCoderUnpackSizes;   // NumFolders + 1
+  Byte *FoToMainUnpackSizeIndex;  // NumFolders
+  UInt64 *CoderUnpackSizes;       // for all coders in all folders
+
+  Byte *CodersData;
+} CSzAr;
+
+UInt64 SzAr_GetFolderUnpackSize(const CSzAr *p, UInt32 folderIndex);
+
+SRes SzAr_DecodeFolder(const CSzAr *p, UInt32 folderIndex,
+    ILookInStream *stream, UInt64 startPos,
+    Byte *outBuffer, size_t outSize,
+    ISzAllocPtr allocMain);
+
+typedef struct
+{
+  CSzAr db;
+
+  UInt64 startPosAfterHeader;
+  UInt64 dataPos;
+  
+  UInt32 NumFiles;
+
+  UInt64 *UnpackPositions;  // NumFiles + 1
+  // Byte *IsEmptyFiles;
+  Byte *IsDirs;
+  CSzBitUi32s CRCs;
+
+  CSzBitUi32s Attribs;
+  // CSzBitUi32s Parents;
+  CSzBitUi64s MTime;
+  CSzBitUi64s CTime;
+
+  UInt32 *FolderToFile;   // NumFolders + 1
+  UInt32 *FileToFolder;   // NumFiles
+
+  size_t *FileNameOffsets; /* in 2-byte steps */
+  Byte *FileNames;  /* UTF-16-LE */
+} CSzArEx;
+
+#define SzArEx_IsDir(p, i) (SzBitArray_Check((p)->IsDirs, i))
+
+#define SzArEx_GetFileSize(p, i) ((p)->UnpackPositions[(i) + 1] - (p)->UnpackPositions[i])
+
+void SzArEx_Init(CSzArEx *p);
+void SzArEx_Free(CSzArEx *p, ISzAllocPtr alloc);
+UInt64 SzArEx_GetFolderStreamPos(const CSzArEx *p, UInt32 folderIndex, UInt32 indexInFolder);
+int SzArEx_GetFolderFullPackSize(const CSzArEx *p, UInt32 folderIndex, UInt64 *resSize);
+
+/*
+if dest == NULL, the return value specifies the required size of the buffer,
+  in 16-bit characters, including the null-terminating character.
+if dest != NULL, the return value specifies the number of 16-bit characters that
+  are written to the dest, including the null-terminating character. */
+
+size_t SzArEx_GetFileNameUtf16(const CSzArEx *p, size_t fileIndex, UInt16 *dest);
+
+/*
+size_t SzArEx_GetFullNameLen(const CSzArEx *p, size_t fileIndex);
+UInt16 *SzArEx_GetFullNameUtf16_Back(const CSzArEx *p, size_t fileIndex, UInt16 *dest);
+*/
+
+
+
+/*
+  SzArEx_Extract extracts file from archive
+
+  *outBuffer must be 0 before first call for each new archive.
+
+  Extracting cache:
+    If you need to decompress more than one file, you can send
+    these values from previous call:
+      *blockIndex,
+      *outBuffer,
+      *outBufferSize
+    You can consider "*outBuffer" as cache of solid block. If your archive is solid,
+    it will increase decompression speed.
+  
+    If you use external function, you can declare these 3 cache variables
+    (blockIndex, outBuffer, outBufferSize) as static in that external function.
+    
+    Free *outBuffer and set *outBuffer to 0, if you want to flush cache.
+*/
+
+SRes SzArEx_Extract(
+    const CSzArEx *db,
+    ILookInStream *inStream,
+    UInt32 fileIndex,         /* index of file */
+    UInt32 *blockIndex,       /* index of solid block */
+    Byte **outBuffer,         /* pointer to pointer to output buffer (allocated with allocMain) */
+    size_t *outBufferSize,    /* buffer size for output buffer */
+    size_t *offset,           /* offset of stream for required file in *outBuffer */
+    size_t *outSizeProcessed, /* size of file in *outBuffer */
+    ISzAllocPtr allocMain,
+    ISzAllocPtr allocTemp);
+
+
+/*
+SzArEx_Open Errors:
+SZ_ERROR_NO_ARCHIVE
+SZ_ERROR_ARCHIVE
+SZ_ERROR_UNSUPPORTED
+SZ_ERROR_MEM
+SZ_ERROR_CRC
+SZ_ERROR_INPUT_EOF
+SZ_ERROR_FAIL
+*/
+
+SRes SzArEx_Open(CSzArEx *p, ILookInStream *inStream,
+    ISzAllocPtr allocMain, ISzAllocPtr allocTemp);
+
+EXTERN_C_END
+
+#endif
diff --git a/libraries/lzma/C/7zArcIn.c b/libraries/lzma/C/7zArcIn.c
new file mode 100644
index 000000000..f74d0fad5
--- /dev/null
+++ b/libraries/lzma/C/7zArcIn.c
@@ -0,0 +1,1771 @@
+/* 7zArcIn.c -- 7z Input functions
+2018-12-31 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include <string.h>
+
+#include "7z.h"
+#include "7zBuf.h"
+#include "7zCrc.h"
+#include "CpuArch.h"
+
+#define MY_ALLOC(T, p, size, alloc) { \
+  if ((p = (T *)ISzAlloc_Alloc(alloc, (size) * sizeof(T))) == NULL) return SZ_ERROR_MEM; }
+
+#define MY_ALLOC_ZE(T, p, size, alloc) { if ((size) == 0) p = NULL; else MY_ALLOC(T, p, size, alloc) }
+
+#define MY_ALLOC_AND_CPY(to, size, from, alloc) \
+  { MY_ALLOC(Byte, to, size, alloc); memcpy(to, from, size); }
+
+#define MY_ALLOC_ZE_AND_CPY(to, size, from, alloc) \
+  { if ((size) == 0) to = NULL; else { MY_ALLOC_AND_CPY(to, size, from, alloc) } }
+
+#define k7zMajorVersion 0
+
+enum EIdEnum
+{
+  k7zIdEnd,
+  k7zIdHeader,
+  k7zIdArchiveProperties,
+  k7zIdAdditionalStreamsInfo,
+  k7zIdMainStreamsInfo,
+  k7zIdFilesInfo,
+  k7zIdPackInfo,
+  k7zIdUnpackInfo,
+  k7zIdSubStreamsInfo,
+  k7zIdSize,
+  k7zIdCRC,
+  k7zIdFolder,
+  k7zIdCodersUnpackSize,
+  k7zIdNumUnpackStream,
+  k7zIdEmptyStream,
+  k7zIdEmptyFile,
+  k7zIdAnti,
+  k7zIdName,
+  k7zIdCTime,
+  k7zIdATime,
+  k7zIdMTime,
+  k7zIdWinAttrib,
+  k7zIdComment,
+  k7zIdEncodedHeader,
+  k7zIdStartPos,
+  k7zIdDummy
+  // k7zNtSecure,
+  // k7zParent,
+  // k7zIsReal
+};
+
+const Byte k7zSignature[k7zSignatureSize] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C};
+
+#define SzBitUi32s_Init(p) { (p)->Defs = NULL; (p)->Vals = NULL; }
+
+static SRes SzBitUi32s_Alloc(CSzBitUi32s *p, size_t num, ISzAllocPtr alloc)
+{
+  if (num == 0)
+  {
+    p->Defs = NULL;
+    p->Vals = NULL;
+  }
+  else
+  {
+    MY_ALLOC(Byte, p->Defs, (num + 7) >> 3, alloc);
+    MY_ALLOC(UInt32, p->Vals, num, alloc);
+  }
+  return SZ_OK;
+}
+
+void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc)
+{
+  ISzAlloc_Free(alloc, p->Defs); p->Defs = NULL;
+  ISzAlloc_Free(alloc, p->Vals); p->Vals = NULL;
+}
+
+#define SzBitUi64s_Init(p) { (p)->Defs = NULL; (p)->Vals = NULL; }
+
+void SzBitUi64s_Free(CSzBitUi64s *p, ISzAllocPtr alloc)
+{
+  ISzAlloc_Free(alloc, p->Defs); p->Defs = NULL;
+  ISzAlloc_Free(alloc, p->Vals); p->Vals = NULL;
+}
+
+
+static void SzAr_Init(CSzAr *p)
+{
+  p->NumPackStreams = 0;
+  p->NumFolders = 0;
+  
+  p->PackPositions = NULL;
+  SzBitUi32s_Init(&p->FolderCRCs);
+
+  p->FoCodersOffsets = NULL;
+  p->FoStartPackStreamIndex = NULL;
+  p->FoToCoderUnpackSizes = NULL;
+  p->FoToMainUnpackSizeIndex = NULL;
+  p->CoderUnpackSizes = NULL;
+
+  p->CodersData = NULL;
+}
+
+static void SzAr_Free(CSzAr *p, ISzAllocPtr alloc)
+{
+  ISzAlloc_Free(alloc, p->PackPositions);
+  SzBitUi32s_Free(&p->FolderCRCs, alloc);
+ 
+  ISzAlloc_Free(alloc, p->FoCodersOffsets);
+  ISzAlloc_Free(alloc, p->FoStartPackStreamIndex);
+  ISzAlloc_Free(alloc, p->FoToCoderUnpackSizes);
+  ISzAlloc_Free(alloc, p->FoToMainUnpackSizeIndex);
+  ISzAlloc_Free(alloc, p->CoderUnpackSizes);
+  
+  ISzAlloc_Free(alloc, p->CodersData);
+
+  SzAr_Init(p);
+}
+
+
+void SzArEx_Init(CSzArEx *p)
+{
+  SzAr_Init(&p->db);
+  
+  p->NumFiles = 0;
+  p->dataPos = 0;
+  
+  p->UnpackPositions = NULL;
+  p->IsDirs = NULL;
+  
+  p->FolderToFile = NULL;
+  p->FileToFolder = NULL;
+  
+  p->FileNameOffsets = NULL;
+  p->FileNames = NULL;
+  
+  SzBitUi32s_Init(&p->CRCs);
+  SzBitUi32s_Init(&p->Attribs);
+  // SzBitUi32s_Init(&p->Parents);
+  SzBitUi64s_Init(&p->MTime);
+  SzBitUi64s_Init(&p->CTime);
+}
+
+void SzArEx_Free(CSzArEx *p, ISzAllocPtr alloc)
+{
+  ISzAlloc_Free(alloc, p->UnpackPositions);
+  ISzAlloc_Free(alloc, p->IsDirs);
+
+  ISzAlloc_Free(alloc, p->FolderToFile);
+  ISzAlloc_Free(alloc, p->FileToFolder);
+
+  ISzAlloc_Free(alloc, p->FileNameOffsets);
+  ISzAlloc_Free(alloc, p->FileNames);
+
+  SzBitUi32s_Free(&p->CRCs, alloc);
+  SzBitUi32s_Free(&p->Attribs, alloc);
+  // SzBitUi32s_Free(&p->Parents, alloc);
+  SzBitUi64s_Free(&p->MTime, alloc);
+  SzBitUi64s_Free(&p->CTime, alloc);
+  
+  SzAr_Free(&p->db, alloc);
+  SzArEx_Init(p);
+}
+
+
+static int TestSignatureCandidate(const Byte *testBytes)
+{
+  unsigned i;
+  for (i = 0; i < k7zSignatureSize; i++)
+    if (testBytes[i] != k7zSignature[i])
+      return 0;
+  return 1;
+}
+
+#define SzData_Clear(p) { (p)->Data = NULL; (p)->Size = 0; }
+
+#define SZ_READ_BYTE_SD(_sd_, dest) if ((_sd_)->Size == 0) return SZ_ERROR_ARCHIVE; (_sd_)->Size--; dest = *(_sd_)->Data++;
+#define SZ_READ_BYTE(dest) SZ_READ_BYTE_SD(sd, dest)
+#define SZ_READ_BYTE_2(dest) if (sd.Size == 0) return SZ_ERROR_ARCHIVE; sd.Size--; dest = *sd.Data++;
+
+#define SKIP_DATA(sd, size) { sd->Size -= (size_t)(size); sd->Data += (size_t)(size); }
+#define SKIP_DATA2(sd, size) { sd.Size -= (size_t)(size); sd.Data += (size_t)(size); }
+
+#define SZ_READ_32(dest) if (sd.Size < 4) return SZ_ERROR_ARCHIVE; \
+   dest = GetUi32(sd.Data); SKIP_DATA2(sd, 4);
+
+static MY_NO_INLINE SRes ReadNumber(CSzData *sd, UInt64 *value)
+{
+  Byte firstByte, mask;
+  unsigned i;
+  UInt32 v;
+
+  SZ_READ_BYTE(firstByte);
+  if ((firstByte & 0x80) == 0)
+  {
+    *value = firstByte;
+    return SZ_OK;
+  }
+  SZ_READ_BYTE(v);
+  if ((firstByte & 0x40) == 0)
+  {
+    *value = (((UInt32)firstByte & 0x3F) << 8) | v;
+    return SZ_OK;
+  }
+  SZ_READ_BYTE(mask);
+  *value = v | ((UInt32)mask << 8);
+  mask = 0x20;
+  for (i = 2; i < 8; i++)
+  {
+    Byte b;
+    if ((firstByte & mask) == 0)
+    {
+      UInt64 highPart = (unsigned)firstByte & (unsigned)(mask - 1);
+      *value |= (highPart << (8 * i));
+      return SZ_OK;
+    }
+    SZ_READ_BYTE(b);
+    *value |= ((UInt64)b << (8 * i));
+    mask >>= 1;
+  }
+  return SZ_OK;
+}
+
+
+static MY_NO_INLINE SRes SzReadNumber32(CSzData *sd, UInt32 *value)
+{
+  Byte firstByte;
+  UInt64 value64;
+  if (sd->Size == 0)
+    return SZ_ERROR_ARCHIVE;
+  firstByte = *sd->Data;
+  if ((firstByte & 0x80) == 0)
+  {
+    *value = firstByte;
+    sd->Data++;
+    sd->Size--;
+    return SZ_OK;
+  }
+  RINOK(ReadNumber(sd, &value64));
+  if (value64 >= (UInt32)0x80000000 - 1)
+    return SZ_ERROR_UNSUPPORTED;
+  if (value64 >= ((UInt64)(1) << ((sizeof(size_t) - 1) * 8 + 4)))
+    return SZ_ERROR_UNSUPPORTED;
+  *value = (UInt32)value64;
+  return SZ_OK;
+}
+
+#define ReadID(sd, value) ReadNumber(sd, value)
+
+static SRes SkipData(CSzData *sd)
+{
+  UInt64 size;
+  RINOK(ReadNumber(sd, &size));
+  if (size > sd->Size)
+    return SZ_ERROR_ARCHIVE;
+  SKIP_DATA(sd, size);
+  return SZ_OK;
+}
+
+static SRes WaitId(CSzData *sd, UInt32 id)
+{
+  for (;;)
+  {
+    UInt64 type;
+    RINOK(ReadID(sd, &type));
+    if (type == id)
+      return SZ_OK;
+    if (type == k7zIdEnd)
+      return SZ_ERROR_ARCHIVE;
+    RINOK(SkipData(sd));
+  }
+}
+
+static SRes RememberBitVector(CSzData *sd, UInt32 numItems, const Byte **v)
+{
+  UInt32 numBytes = (numItems + 7) >> 3;
+  if (numBytes > sd->Size)
+    return SZ_ERROR_ARCHIVE;
+  *v = sd->Data;
+  SKIP_DATA(sd, numBytes);
+  return SZ_OK;
+}
+
+static UInt32 CountDefinedBits(const Byte *bits, UInt32 numItems)
+{
+  Byte b = 0;
+  unsigned m = 0;
+  UInt32 sum = 0;
+  for (; numItems != 0; numItems--)
+  {
+    if (m == 0)
+    {
+      b = *bits++;
+      m = 8;
+    }
+    m--;
+    sum += ((b >> m) & 1);
+  }
+  return sum;
+}
+
+static MY_NO_INLINE SRes ReadBitVector(CSzData *sd, UInt32 numItems, Byte **v, ISzAllocPtr alloc)
+{
+  Byte allAreDefined;
+  Byte *v2;
+  UInt32 numBytes = (numItems + 7) >> 3;
+  *v = NULL;
+  SZ_READ_BYTE(allAreDefined);
+  if (numBytes == 0)
+    return SZ_OK;
+  if (allAreDefined == 0)
+  {
+    if (numBytes > sd->Size)
+      return SZ_ERROR_ARCHIVE;
+    MY_ALLOC_AND_CPY(*v, numBytes, sd->Data, alloc);
+    SKIP_DATA(sd, numBytes);
+    return SZ_OK;
+  }
+  MY_ALLOC(Byte, *v, numBytes, alloc);
+  v2 = *v;
+  memset(v2, 0xFF, (size_t)numBytes);
+  {
+    unsigned numBits = (unsigned)numItems & 7;
+    if (numBits != 0)
+      v2[(size_t)numBytes - 1] = (Byte)((((UInt32)1 << numBits) - 1) << (8 - numBits));
+  }
+  return SZ_OK;
+}
+
+static MY_NO_INLINE SRes ReadUi32s(CSzData *sd2, UInt32 numItems, CSzBitUi32s *crcs, ISzAllocPtr alloc)
+{
+  UInt32 i;
+  CSzData sd;
+  UInt32 *vals;
+  const Byte *defs;
+  MY_ALLOC_ZE(UInt32, crcs->Vals, numItems, alloc);
+  sd = *sd2;
+  defs = crcs->Defs;
+  vals = crcs->Vals;
+  for (i = 0; i < numItems; i++)
+    if (SzBitArray_Check(defs, i))
+    {
+      SZ_READ_32(vals[i]);
+    }
+    else
+      vals[i] = 0;
+  *sd2 = sd;
+  return SZ_OK;
+}
+
+static SRes ReadBitUi32s(CSzData *sd, UInt32 numItems, CSzBitUi32s *crcs, ISzAllocPtr alloc)
+{
+  SzBitUi32s_Free(crcs, alloc);
+  RINOK(ReadBitVector(sd, numItems, &crcs->Defs, alloc));
+  return ReadUi32s(sd, numItems, crcs, alloc);
+}
+
+static SRes SkipBitUi32s(CSzData *sd, UInt32 numItems)
+{
+  Byte allAreDefined;
+  UInt32 numDefined = numItems;
+  SZ_READ_BYTE(allAreDefined);
+  if (!allAreDefined)
+  {
+    size_t numBytes = (numItems + 7) >> 3;
+    if (numBytes > sd->Size)
+      return SZ_ERROR_ARCHIVE;
+    numDefined = CountDefinedBits(sd->Data, numItems);
+    SKIP_DATA(sd, numBytes);
+  }
+  if (numDefined > (sd->Size >> 2))
+    return SZ_ERROR_ARCHIVE;
+  SKIP_DATA(sd, (size_t)numDefined * 4);
+  return SZ_OK;
+}
+
+static SRes ReadPackInfo(CSzAr *p, CSzData *sd, ISzAllocPtr alloc)
+{
+  RINOK(SzReadNumber32(sd, &p->NumPackStreams));
+
+  RINOK(WaitId(sd, k7zIdSize));
+  MY_ALLOC(UInt64, p->PackPositions, (size_t)p->NumPackStreams + 1, alloc);
+  {
+    UInt64 sum = 0;
+    UInt32 i;
+    UInt32 numPackStreams = p->NumPackStreams;
+    for (i = 0; i < numPackStreams; i++)
+    {
+      UInt64 packSize;
+      p->PackPositions[i] = sum;
+      RINOK(ReadNumber(sd, &packSize));
+      sum += packSize;
+      if (sum < packSize)
+        return SZ_ERROR_ARCHIVE;
+    }
+    p->PackPositions[i] = sum;
+  }
+
+  for (;;)
+  {
+    UInt64 type;
+    RINOK(ReadID(sd, &type));
+    if (type == k7zIdEnd)
+      return SZ_OK;
+    if (type == k7zIdCRC)
+    {
+      /* CRC of packed streams is unused now */
+      RINOK(SkipBitUi32s(sd, p->NumPackStreams));
+      continue;
+    }
+    RINOK(SkipData(sd));
+  }
+}
+
+/*
+static SRes SzReadSwitch(CSzData *sd)
+{
+  Byte external;
+  RINOK(SzReadByte(sd, &external));
+  return (external == 0) ? SZ_OK: SZ_ERROR_UNSUPPORTED;
+}
+*/
+
+#define k_NumCodersStreams_in_Folder_MAX (SZ_NUM_BONDS_IN_FOLDER_MAX + SZ_NUM_PACK_STREAMS_IN_FOLDER_MAX)
+
+SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd)
+{
+  UInt32 numCoders, i;
+  UInt32 numInStreams = 0;
+  const Byte *dataStart = sd->Data;
+
+  f->NumCoders = 0;
+  f->NumBonds = 0;
+  f->NumPackStreams = 0;
+  f->UnpackStream = 0;
+  
+  RINOK(SzReadNumber32(sd, &numCoders));
+  if (numCoders == 0 || numCoders > SZ_NUM_CODERS_IN_FOLDER_MAX)
+    return SZ_ERROR_UNSUPPORTED;
+  
+  for (i = 0; i < numCoders; i++)
+  {
+    Byte mainByte;
+    CSzCoderInfo *coder = f->Coders + i;
+    unsigned idSize, j;
+    UInt64 id;
+    
+    SZ_READ_BYTE(mainByte);
+    if ((mainByte & 0xC0) != 0)
+      return SZ_ERROR_UNSUPPORTED;
+    
+    idSize = (unsigned)(mainByte & 0xF);
+    if (idSize > sizeof(id))
+      return SZ_ERROR_UNSUPPORTED;
+    if (idSize > sd->Size)
+      return SZ_ERROR_ARCHIVE;
+    id = 0;
+    for (j = 0; j < idSize; j++)
+    {
+      id = ((id << 8) | *sd->Data);
+      sd->Data++;
+      sd->Size--;
+    }
+    if (id > (UInt32)0xFFFFFFFF)
+      return SZ_ERROR_UNSUPPORTED;
+    coder->MethodID = (UInt32)id;
+    
+    coder->NumStreams = 1;
+    coder->PropsOffset = 0;
+    coder->PropsSize = 0;
+    
+    if ((mainByte & 0x10) != 0)
+    {
+      UInt32 numStreams;
+      
+      RINOK(SzReadNumber32(sd, &numStreams));
+      if (numStreams > k_NumCodersStreams_in_Folder_MAX)
+        return SZ_ERROR_UNSUPPORTED;
+      coder->NumStreams = (Byte)numStreams;
+
+      RINOK(SzReadNumber32(sd, &numStreams));
+      if (numStreams != 1)
+        return SZ_ERROR_UNSUPPORTED;
+    }
+
+    numInStreams += coder->NumStreams;
+
+    if (numInStreams > k_NumCodersStreams_in_Folder_MAX)
+      return SZ_ERROR_UNSUPPORTED;
+
+    if ((mainByte & 0x20) != 0)
+    {
+      UInt32 propsSize = 0;
+      RINOK(SzReadNumber32(sd, &propsSize));
+      if (propsSize > sd->Size)
+        return SZ_ERROR_ARCHIVE;
+      if (propsSize >= 0x80)
+        return SZ_ERROR_UNSUPPORTED;
+      coder->PropsOffset = sd->Data - dataStart;
+      coder->PropsSize = (Byte)propsSize;
+      sd->Data += (size_t)propsSize;
+      sd->Size -= (size_t)propsSize;
+    }
+  }
+
+  /*
+  if (numInStreams == 1 && numCoders == 1)
+  {
+    f->NumPackStreams = 1;
+    f->PackStreams[0] = 0;
+  }
+  else
+  */
+  {
+    Byte streamUsed[k_NumCodersStreams_in_Folder_MAX];
+    UInt32 numBonds, numPackStreams;
+    
+    numBonds = numCoders - 1;
+    if (numInStreams < numBonds)
+      return SZ_ERROR_ARCHIVE;
+    if (numBonds > SZ_NUM_BONDS_IN_FOLDER_MAX)
+      return SZ_ERROR_UNSUPPORTED;
+    f->NumBonds = numBonds;
+    
+    numPackStreams = numInStreams - numBonds;
+    if (numPackStreams > SZ_NUM_PACK_STREAMS_IN_FOLDER_MAX)
+      return SZ_ERROR_UNSUPPORTED;
+    f->NumPackStreams = numPackStreams;
+  
+    for (i = 0; i < numInStreams; i++)
+      streamUsed[i] = False;
+    
+    if (numBonds != 0)
+    {
+      Byte coderUsed[SZ_NUM_CODERS_IN_FOLDER_MAX];
+
+      for (i = 0; i < numCoders; i++)
+        coderUsed[i] = False;
+      
+      for (i = 0; i < numBonds; i++)
+      {
+        CSzBond *bp = f->Bonds + i;
+        
+        RINOK(SzReadNumber32(sd, &bp->InIndex));
+        if (bp->InIndex >= numInStreams || streamUsed[bp->InIndex])
+          return SZ_ERROR_ARCHIVE;
+        streamUsed[bp->InIndex] = True;
+        
+        RINOK(SzReadNumber32(sd, &bp->OutIndex));
+        if (bp->OutIndex >= numCoders || coderUsed[bp->OutIndex])
+          return SZ_ERROR_ARCHIVE;
+        coderUsed[bp->OutIndex] = True;
+      }
+      
+      for (i = 0; i < numCoders; i++)
+        if (!coderUsed[i])
+        {
+          f->UnpackStream = i;
+          break;
+        }
+      
+      if (i == numCoders)
+        return SZ_ERROR_ARCHIVE;
+    }
+    
+    if (numPackStreams == 1)
+    {
+      for (i = 0; i < numInStreams; i++)
+        if (!streamUsed[i])
+          break;
+      if (i == numInStreams)
+        return SZ_ERROR_ARCHIVE;
+      f->PackStreams[0] = i;
+    }
+    else
+      for (i = 0; i < numPackStreams; i++)
+      {
+        UInt32 index;
+        RINOK(SzReadNumber32(sd, &index));
+        if (index >= numInStreams || streamUsed[index])
+          return SZ_ERROR_ARCHIVE;
+        streamUsed[index] = True;
+        f->PackStreams[i] = index;
+      }
+  }
+
+  f->NumCoders = numCoders;
+
+  return SZ_OK;
+}
+
+
+static MY_NO_INLINE SRes SkipNumbers(CSzData *sd2, UInt32 num)
+{
+  CSzData sd;
+  sd = *sd2;
+  for (; num != 0; num--)
+  {
+    Byte firstByte, mask;
+    unsigned i;
+    SZ_READ_BYTE_2(firstByte);
+    if ((firstByte & 0x80) == 0)
+      continue;
+    if ((firstByte & 0x40) == 0)
+    {
+      if (sd.Size == 0)
+        return SZ_ERROR_ARCHIVE;
+      sd.Size--;
+      sd.Data++;
+      continue;
+    }
+    mask = 0x20;
+    for (i = 2; i < 8 && (firstByte & mask) != 0; i++)
+      mask >>= 1;
+    if (i > sd.Size)
+      return SZ_ERROR_ARCHIVE;
+    SKIP_DATA2(sd, i);
+  }
+  *sd2 = sd;
+  return SZ_OK;
+}
+
+
+#define k_Scan_NumCoders_MAX 64
+#define k_Scan_NumCodersStreams_in_Folder_MAX 64
+
+
+static SRes ReadUnpackInfo(CSzAr *p,
+    CSzData *sd2,
+    UInt32 numFoldersMax,
+    const CBuf *tempBufs, UInt32 numTempBufs,
+    ISzAllocPtr alloc)
+{
+  CSzData sd;
+  
+  UInt32 fo, numFolders, numCodersOutStreams, packStreamIndex;
+  const Byte *startBufPtr;
+  Byte external;
+  
+  RINOK(WaitId(sd2, k7zIdFolder));
+  
+  RINOK(SzReadNumber32(sd2, &numFolders));
+  if (numFolders > numFoldersMax)
+    return SZ_ERROR_UNSUPPORTED;
+  p->NumFolders = numFolders;
+
+  SZ_READ_BYTE_SD(sd2, external);
+  if (external == 0)
+    sd = *sd2;
+  else
+  {
+    UInt32 index;
+    RINOK(SzReadNumber32(sd2, &index));
+    if (index >= numTempBufs)
+      return SZ_ERROR_ARCHIVE;
+    sd.Data = tempBufs[index].data;
+    sd.Size = tempBufs[index].size;
+  }
+  
+  MY_ALLOC(size_t, p->FoCodersOffsets, (size_t)numFolders + 1, alloc);
+  MY_ALLOC(UInt32, p->FoStartPackStreamIndex, (size_t)numFolders + 1, alloc);
+  MY_ALLOC(UInt32, p->FoToCoderUnpackSizes, (size_t)numFolders + 1, alloc);
+  MY_ALLOC_ZE(Byte, p->FoToMainUnpackSizeIndex, (size_t)numFolders, alloc);
+  
+  startBufPtr = sd.Data;
+  
+  packStreamIndex = 0;
+  numCodersOutStreams = 0;
+
+  for (fo = 0; fo < numFolders; fo++)
+  {
+    UInt32 numCoders, ci, numInStreams = 0;
+    
+    p->FoCodersOffsets[fo] = sd.Data - startBufPtr;
+    
+    RINOK(SzReadNumber32(&sd, &numCoders));
+    if (numCoders == 0 || numCoders > k_Scan_NumCoders_MAX)
+      return SZ_ERROR_UNSUPPORTED;
+    
+    for (ci = 0; ci < numCoders; ci++)
+    {
+      Byte mainByte;
+      unsigned idSize;
+      UInt32 coderInStreams;
+      
+      SZ_READ_BYTE_2(mainByte);
+      if ((mainByte & 0xC0) != 0)
+        return SZ_ERROR_UNSUPPORTED;
+      idSize = (mainByte & 0xF);
+      if (idSize > 8)
+        return SZ_ERROR_UNSUPPORTED;
+      if (idSize > sd.Size)
+        return SZ_ERROR_ARCHIVE;
+      SKIP_DATA2(sd, idSize);
+      
+      coderInStreams = 1;
+      
+      if ((mainByte & 0x10) != 0)
+      {
+        UInt32 coderOutStreams;
+        RINOK(SzReadNumber32(&sd, &coderInStreams));
+        RINOK(SzReadNumber32(&sd, &coderOutStreams));
+        if (coderInStreams > k_Scan_NumCodersStreams_in_Folder_MAX || coderOutStreams != 1)
+          return SZ_ERROR_UNSUPPORTED;
+      }
+      
+      numInStreams += coderInStreams;
+
+      if ((mainByte & 0x20) != 0)
+      {
+        UInt32 propsSize;
+        RINOK(SzReadNumber32(&sd, &propsSize));
+        if (propsSize > sd.Size)
+          return SZ_ERROR_ARCHIVE;
+        SKIP_DATA2(sd, propsSize);
+      }
+    }
+    
+    {
+      UInt32 indexOfMainStream = 0;
+      UInt32 numPackStreams = 1;
+      
+      if (numCoders != 1 || numInStreams != 1)
+      {
+        Byte streamUsed[k_Scan_NumCodersStreams_in_Folder_MAX];
+        Byte coderUsed[k_Scan_NumCoders_MAX];
+    
+        UInt32 i;
+        UInt32 numBonds = numCoders - 1;
+        if (numInStreams < numBonds)
+          return SZ_ERROR_ARCHIVE;
+        
+        if (numInStreams > k_Scan_NumCodersStreams_in_Folder_MAX)
+          return SZ_ERROR_UNSUPPORTED;
+        
+        for (i = 0; i < numInStreams; i++)
+          streamUsed[i] = False;
+        for (i = 0; i < numCoders; i++)
+          coderUsed[i] = False;
+        
+        for (i = 0; i < numBonds; i++)
+        {
+          UInt32 index;
+          
+          RINOK(SzReadNumber32(&sd, &index));
+          if (index >= numInStreams || streamUsed[index])
+            return SZ_ERROR_ARCHIVE;
+          streamUsed[index] = True;
+          
+          RINOK(SzReadNumber32(&sd, &index));
+          if (index >= numCoders || coderUsed[index])
+            return SZ_ERROR_ARCHIVE;
+          coderUsed[index] = True;
+        }
+        
+        numPackStreams = numInStreams - numBonds;
+        
+        if (numPackStreams != 1)
+          for (i = 0; i < numPackStreams; i++)
+          {
+            UInt32 index;
+            RINOK(SzReadNumber32(&sd, &index));
+            if (index >= numInStreams || streamUsed[index])
+              return SZ_ERROR_ARCHIVE;
+            streamUsed[index] = True;
+          }
+          
+        for (i = 0; i < numCoders; i++)
+          if (!coderUsed[i])
+          {
+            indexOfMainStream = i;
+            break;
+          }
+ 
+        if (i == numCoders)
+          return SZ_ERROR_ARCHIVE;
+      }
+      
+      p->FoStartPackStreamIndex[fo] = packStreamIndex;
+      p->FoToCoderUnpackSizes[fo] = numCodersOutStreams;
+      p->FoToMainUnpackSizeIndex[fo] = (Byte)indexOfMainStream;
+      numCodersOutStreams += numCoders;
+      if (numCodersOutStreams < numCoders)
+        return SZ_ERROR_UNSUPPORTED;
+      if (numPackStreams > p->NumPackStreams - packStreamIndex)
+        return SZ_ERROR_ARCHIVE;
+      packStreamIndex += numPackStreams;
+    }
+  }
+
+  p->FoToCoderUnpackSizes[fo] = numCodersOutStreams;
+  
+  {
+    size_t dataSize = sd.Data - startBufPtr;
+    p->FoStartPackStreamIndex[fo] = packStreamIndex;
+    p->FoCodersOffsets[fo] = dataSize;
+    MY_ALLOC_ZE_AND_CPY(p->CodersData, dataSize, startBufPtr, alloc);
+  }
+  
+  if (external != 0)
+  {
+    if (sd.Size != 0)
+      return SZ_ERROR_ARCHIVE;
+    sd = *sd2;
+  }
+  
+  RINOK(WaitId(&sd, k7zIdCodersUnpackSize));
+  
+  MY_ALLOC_ZE(UInt64, p->CoderUnpackSizes, (size_t)numCodersOutStreams, alloc);
+  {
+    UInt32 i;
+    for (i = 0; i < numCodersOutStreams; i++)
+    {
+      RINOK(ReadNumber(&sd, p->CoderUnpackSizes + i));
+    }
+  }
+
+  for (;;)
+  {
+    UInt64 type;
+    RINOK(ReadID(&sd, &type));
+    if (type == k7zIdEnd)
+    {
+      *sd2 = sd;
+      return SZ_OK;
+    }
+    if (type == k7zIdCRC)
+    {
+      RINOK(ReadBitUi32s(&sd, numFolders, &p->FolderCRCs, alloc));
+      continue;
+    }
+    RINOK(SkipData(&sd));
+  }
+}
+
+
+UInt64 SzAr_GetFolderUnpackSize(const CSzAr *p, UInt32 folderIndex)
+{
+  return p->CoderUnpackSizes[p->FoToCoderUnpackSizes[folderIndex] + p->FoToMainUnpackSizeIndex[folderIndex]];
+}
+
+
+typedef struct
+{
+  UInt32 NumTotalSubStreams;
+  UInt32 NumSubDigests;
+  CSzData sdNumSubStreams;
+  CSzData sdSizes;
+  CSzData sdCRCs;
+} CSubStreamInfo;
+
+
+static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi)
+{
+  UInt64 type = 0;
+  UInt32 numSubDigests = 0;
+  UInt32 numFolders = p->NumFolders;
+  UInt32 numUnpackStreams = numFolders;
+  UInt32 numUnpackSizesInData = 0;
+
+  for (;;)
+  {
+    RINOK(ReadID(sd, &type));
+    if (type == k7zIdNumUnpackStream)
+    {
+      UInt32 i;
+      ssi->sdNumSubStreams.Data = sd->Data;
+      numUnpackStreams = 0;
+      numSubDigests = 0;
+      for (i = 0; i < numFolders; i++)
+      {
+        UInt32 numStreams;
+        RINOK(SzReadNumber32(sd, &numStreams));
+        if (numUnpackStreams > numUnpackStreams + numStreams)
+          return SZ_ERROR_UNSUPPORTED;
+        numUnpackStreams += numStreams;
+        if (numStreams != 0)
+          numUnpackSizesInData += (numStreams - 1);
+        if (numStreams != 1 || !SzBitWithVals_Check(&p->FolderCRCs, i))
+          numSubDigests += numStreams;
+      }
+      ssi->sdNumSubStreams.Size = sd->Data - ssi->sdNumSubStreams.Data;
+      continue;
+    }
+    if (type == k7zIdCRC || type == k7zIdSize || type == k7zIdEnd)
+      break;
+    RINOK(SkipData(sd));
+  }
+
+  if (!ssi->sdNumSubStreams.Data)
+  {
+    numSubDigests = numFolders;
+    if (p->FolderCRCs.Defs)
+      numSubDigests = numFolders - CountDefinedBits(p->FolderCRCs.Defs, numFolders);
+  }
+  
+  ssi->NumTotalSubStreams = numUnpackStreams;
+  ssi->NumSubDigests = numSubDigests;
+
+  if (type == k7zIdSize)
+  {
+    ssi->sdSizes.Data = sd->Data;
+    RINOK(SkipNumbers(sd, numUnpackSizesInData));
+    ssi->sdSizes.Size = sd->Data - ssi->sdSizes.Data;
+    RINOK(ReadID(sd, &type));
+  }
+
+  for (;;)
+  {
+    if (type == k7zIdEnd)
+      return SZ_OK;
+    if (type == k7zIdCRC)
+    {
+      ssi->sdCRCs.Data = sd->Data;
+      RINOK(SkipBitUi32s(sd, numSubDigests));
+      ssi->sdCRCs.Size = sd->Data - ssi->sdCRCs.Data;
+    }
+    else
+    {
+      RINOK(SkipData(sd));
+    }
+    RINOK(ReadID(sd, &type));
+  }
+}
+
+static SRes SzReadStreamsInfo(CSzAr *p,
+    CSzData *sd,
+    UInt32 numFoldersMax, const CBuf *tempBufs, UInt32 numTempBufs,
+    UInt64 *dataOffset,
+    CSubStreamInfo *ssi,
+    ISzAllocPtr alloc)
+{
+  UInt64 type;
+
+  SzData_Clear(&ssi->sdSizes);
+  SzData_Clear(&ssi->sdCRCs);
+  SzData_Clear(&ssi->sdNumSubStreams);
+
+  *dataOffset = 0;
+  RINOK(ReadID(sd, &type));
+  if (type == k7zIdPackInfo)
+  {
+    RINOK(ReadNumber(sd, dataOffset));
+    RINOK(ReadPackInfo(p, sd, alloc));
+    RINOK(ReadID(sd, &type));
+  }
+  if (type == k7zIdUnpackInfo)
+  {
+    RINOK(ReadUnpackInfo(p, sd, numFoldersMax, tempBufs, numTempBufs, alloc));
+    RINOK(ReadID(sd, &type));
+  }
+  if (type == k7zIdSubStreamsInfo)
+  {
+    RINOK(ReadSubStreamsInfo(p, sd, ssi));
+    RINOK(ReadID(sd, &type));
+  }
+  else
+  {
+    ssi->NumTotalSubStreams = p->NumFolders;
+    // ssi->NumSubDigests = 0;
+  }
+
+  return (type == k7zIdEnd ? SZ_OK : SZ_ERROR_UNSUPPORTED);
+}
+
+static SRes SzReadAndDecodePackedStreams(
+    ILookInStream *inStream,
+    CSzData *sd,
+    CBuf *tempBufs,
+    UInt32 numFoldersMax,
+    UInt64 baseOffset,
+    CSzAr *p,
+    ISzAllocPtr allocTemp)
+{
+  UInt64 dataStartPos;
+  UInt32 fo;
+  CSubStreamInfo ssi;
+
+  RINOK(SzReadStreamsInfo(p, sd, numFoldersMax, NULL, 0, &dataStartPos, &ssi, allocTemp));
+  
+  dataStartPos += baseOffset;
+  if (p->NumFolders == 0)
+    return SZ_ERROR_ARCHIVE;
+ 
+  for (fo = 0; fo < p->NumFolders; fo++)
+    Buf_Init(tempBufs + fo);
+  
+  for (fo = 0; fo < p->NumFolders; fo++)
+  {
+    CBuf *tempBuf = tempBufs + fo;
+    UInt64 unpackSize = SzAr_GetFolderUnpackSize(p, fo);
+    if ((size_t)unpackSize != unpackSize)
+      return SZ_ERROR_MEM;
+    if (!Buf_Create(tempBuf, (size_t)unpackSize, allocTemp))
+      return SZ_ERROR_MEM;
+  }
+  
+  for (fo = 0; fo < p->NumFolders; fo++)
+  {
+    const CBuf *tempBuf = tempBufs + fo;
+    RINOK(LookInStream_SeekTo(inStream, dataStartPos));
+    RINOK(SzAr_DecodeFolder(p, fo, inStream, dataStartPos, tempBuf->data, tempBuf->size, allocTemp));
+  }
+  
+  return SZ_OK;
+}
+
+static SRes SzReadFileNames(const Byte *data, size_t size, UInt32 numFiles, size_t *offsets)
+{
+  size_t pos = 0;
+  *offsets++ = 0;
+  if (numFiles == 0)
+    return (size == 0) ? SZ_OK : SZ_ERROR_ARCHIVE;
+  if (size < 2)
+    return SZ_ERROR_ARCHIVE;
+  if (data[size - 2] != 0 || data[size - 1] != 0)
+    return SZ_ERROR_ARCHIVE;
+  do
+  {
+    const Byte *p;
+    if (pos == size)
+      return SZ_ERROR_ARCHIVE;
+    for (p = data + pos;
+      #ifdef _WIN32
+      *(const UInt16 *)p != 0
+      #else
+      p[0] != 0 || p[1] != 0
+      #endif
+      ; p += 2);
+    pos = p - data + 2;
+    *offsets++ = (pos >> 1);
+  }
+  while (--numFiles);
+  return (pos == size) ? SZ_OK : SZ_ERROR_ARCHIVE;
+}
+
+static MY_NO_INLINE SRes ReadTime(CSzBitUi64s *p, UInt32 num,
+    CSzData *sd2,
+    const CBuf *tempBufs, UInt32 numTempBufs,
+    ISzAllocPtr alloc)
+{
+  CSzData sd;
+  UInt32 i;
+  CNtfsFileTime *vals;
+  Byte *defs;
+  Byte external;
+  
+  RINOK(ReadBitVector(sd2, num, &p->Defs, alloc));
+  
+  SZ_READ_BYTE_SD(sd2, external);
+  if (external == 0)
+    sd = *sd2;
+  else
+  {
+    UInt32 index;
+    RINOK(SzReadNumber32(sd2, &index));
+    if (index >= numTempBufs)
+      return SZ_ERROR_ARCHIVE;
+    sd.Data = tempBufs[index].data;
+    sd.Size = tempBufs[index].size;
+  }
+  
+  MY_ALLOC_ZE(CNtfsFileTime, p->Vals, num, alloc);
+  vals = p->Vals;
+  defs = p->Defs;
+  for (i = 0; i < num; i++)
+    if (SzBitArray_Check(defs, i))
+    {
+      if (sd.Size < 8)
+        return SZ_ERROR_ARCHIVE;
+      vals[i].Low = GetUi32(sd.Data);
+      vals[i].High = GetUi32(sd.Data + 4);
+      SKIP_DATA2(sd, 8);
+    }
+    else
+      vals[i].High = vals[i].Low = 0;
+  
+  if (external == 0)
+    *sd2 = sd;
+  
+  return SZ_OK;
+}
+
+
+#define NUM_ADDITIONAL_STREAMS_MAX 8
+
+
+static SRes SzReadHeader2(
+    CSzArEx *p,   /* allocMain */
+    CSzData *sd,
+    ILookInStream *inStream,
+    CBuf *tempBufs, UInt32 *numTempBufs,
+    ISzAllocPtr allocMain,
+    ISzAllocPtr allocTemp
+    )
+{
+  CSubStreamInfo ssi;
+
+{
+  UInt64 type;
+  
+  SzData_Clear(&ssi.sdSizes);
+  SzData_Clear(&ssi.sdCRCs);
+  SzData_Clear(&ssi.sdNumSubStreams);
+
+  ssi.NumSubDigests = 0;
+  ssi.NumTotalSubStreams = 0;
+
+  RINOK(ReadID(sd, &type));
+
+  if (type == k7zIdArchiveProperties)
+  {
+    for (;;)
+    {
+      UInt64 type2;
+      RINOK(ReadID(sd, &type2));
+      if (type2 == k7zIdEnd)
+        break;
+      RINOK(SkipData(sd));
+    }
+    RINOK(ReadID(sd, &type));
+  }
+
+  if (type == k7zIdAdditionalStreamsInfo)
+  {
+    CSzAr tempAr;
+    SRes res;
+    
+    SzAr_Init(&tempAr);
+    res = SzReadAndDecodePackedStreams(inStream, sd, tempBufs, NUM_ADDITIONAL_STREAMS_MAX,
+        p->startPosAfterHeader, &tempAr, allocTemp);
+    *numTempBufs = tempAr.NumFolders;
+    SzAr_Free(&tempAr, allocTemp);
+    
+    if (res != SZ_OK)
+      return res;
+    RINOK(ReadID(sd, &type));
+  }
+
+  if (type == k7zIdMainStreamsInfo)
+  {
+    RINOK(SzReadStreamsInfo(&p->db, sd, (UInt32)1 << 30, tempBufs, *numTempBufs,
+        &p->dataPos, &ssi, allocMain));
+    p->dataPos += p->startPosAfterHeader;
+    RINOK(ReadID(sd, &type));
+  }
+
+  if (type == k7zIdEnd)
+  {
+    return SZ_OK;
+  }
+
+  if (type != k7zIdFilesInfo)
+    return SZ_ERROR_ARCHIVE;
+}
+
+{
+  UInt32 numFiles = 0;
+  UInt32 numEmptyStreams = 0;
+  const Byte *emptyStreams = NULL;
+  const Byte *emptyFiles = NULL;
+  
+  RINOK(SzReadNumber32(sd, &numFiles));
+  p->NumFiles = numFiles;
+
+  for (;;)
+  {
+    UInt64 type;
+    UInt64 size;
+    RINOK(ReadID(sd, &type));
+    if (type == k7zIdEnd)
+      break;
+    RINOK(ReadNumber(sd, &size));
+    if (size > sd->Size)
+      return SZ_ERROR_ARCHIVE;
+    
+    if (type >= ((UInt32)1 << 8))
+    {
+      SKIP_DATA(sd, size);
+    }
+    else switch ((unsigned)type)
+    {
+      case k7zIdName:
+      {
+        size_t namesSize;
+        const Byte *namesData;
+        Byte external;
+
+        SZ_READ_BYTE(external);
+        if (external == 0)
+        {
+          namesSize = (size_t)size - 1;
+          namesData = sd->Data;
+        }
+        else
+        {
+          UInt32 index;
+          RINOK(SzReadNumber32(sd, &index));
+          if (index >= *numTempBufs)
+            return SZ_ERROR_ARCHIVE;
+          namesData = (tempBufs)[index].data;
+          namesSize = (tempBufs)[index].size;
+        }
+
+        if ((namesSize & 1) != 0)
+          return SZ_ERROR_ARCHIVE;
+        MY_ALLOC(size_t, p->FileNameOffsets, numFiles + 1, allocMain);
+        MY_ALLOC_ZE_AND_CPY(p->FileNames, namesSize, namesData, allocMain);
+        RINOK(SzReadFileNames(p->FileNames, namesSize, numFiles, p->FileNameOffsets))
+        if (external == 0)
+        {
+          SKIP_DATA(sd, namesSize);
+        }
+        break;
+      }
+      case k7zIdEmptyStream:
+      {
+        RINOK(RememberBitVector(sd, numFiles, &emptyStreams));
+        numEmptyStreams = CountDefinedBits(emptyStreams, numFiles);
+        emptyFiles = NULL;
+        break;
+      }
+      case k7zIdEmptyFile:
+      {
+        RINOK(RememberBitVector(sd, numEmptyStreams, &emptyFiles));
+        break;
+      }
+      case k7zIdWinAttrib:
+      {
+        Byte external;
+        CSzData sdSwitch;
+        CSzData *sdPtr;
+        SzBitUi32s_Free(&p->Attribs, allocMain);
+        RINOK(ReadBitVector(sd, numFiles, &p->Attribs.Defs, allocMain));
+
+        SZ_READ_BYTE(external);
+        if (external == 0)
+          sdPtr = sd;
+        else
+        {
+          UInt32 index;
+          RINOK(SzReadNumber32(sd, &index));
+          if (index >= *numTempBufs)
+            return SZ_ERROR_ARCHIVE;
+          sdSwitch.Data = (tempBufs)[index].data;
+          sdSwitch.Size = (tempBufs)[index].size;
+          sdPtr = &sdSwitch;
+        }
+        RINOK(ReadUi32s(sdPtr, numFiles, &p->Attribs, allocMain));
+        break;
+      }
+      /*
+      case k7zParent:
+      {
+        SzBitUi32s_Free(&p->Parents, allocMain);
+        RINOK(ReadBitVector(sd, numFiles, &p->Parents.Defs, allocMain));
+        RINOK(SzReadSwitch(sd));
+        RINOK(ReadUi32s(sd, numFiles, &p->Parents, allocMain));
+        break;
+      }
+      */
+      case k7zIdMTime: RINOK(ReadTime(&p->MTime, numFiles, sd, tempBufs, *numTempBufs, allocMain)); break;
+      case k7zIdCTime: RINOK(ReadTime(&p->CTime, numFiles, sd, tempBufs, *numTempBufs, allocMain)); break;
+      default:
+      {
+        SKIP_DATA(sd, size);
+      }
+    }
+  }
+
+  if (numFiles - numEmptyStreams != ssi.NumTotalSubStreams)
+    return SZ_ERROR_ARCHIVE;
+
+  for (;;)
+  {
+    UInt64 type;
+    RINOK(ReadID(sd, &type));
+    if (type == k7zIdEnd)
+      break;
+    RINOK(SkipData(sd));
+  }
+
+  {
+    UInt32 i;
+    UInt32 emptyFileIndex = 0;
+    UInt32 folderIndex = 0;
+    UInt32 remSubStreams = 0;
+    UInt32 numSubStreams = 0;
+    UInt64 unpackPos = 0;
+    const Byte *digestsDefs = NULL;
+    const Byte *digestsVals = NULL;
+    UInt32 digestsValsIndex = 0;
+    UInt32 digestIndex;
+    Byte allDigestsDefined = 0;
+    Byte isDirMask = 0;
+    Byte crcMask = 0;
+    Byte mask = 0x80;
+    
+    MY_ALLOC(UInt32, p->FolderToFile, p->db.NumFolders + 1, allocMain);
+    MY_ALLOC_ZE(UInt32, p->FileToFolder, p->NumFiles, allocMain);
+    MY_ALLOC(UInt64, p->UnpackPositions, p->NumFiles + 1, allocMain);
+    MY_ALLOC_ZE(Byte, p->IsDirs, (p->NumFiles + 7) >> 3, allocMain);
+
+    RINOK(SzBitUi32s_Alloc(&p->CRCs, p->NumFiles, allocMain));
+
+    if (ssi.sdCRCs.Size != 0)
+    {
+      SZ_READ_BYTE_SD(&ssi.sdCRCs, allDigestsDefined);
+      if (allDigestsDefined)
+        digestsVals = ssi.sdCRCs.Data;
+      else
+      {
+        size_t numBytes = (ssi.NumSubDigests + 7) >> 3;
+        digestsDefs = ssi.sdCRCs.Data;
+        digestsVals = digestsDefs + numBytes;
+      }
+    }
+
+    digestIndex = 0;
+    
+    for (i = 0; i < numFiles; i++, mask >>= 1)
+    {
+      if (mask == 0)
+      {
+        UInt32 byteIndex = (i - 1) >> 3;
+        p->IsDirs[byteIndex] = isDirMask;
+        p->CRCs.Defs[byteIndex] = crcMask;
+        isDirMask = 0;
+        crcMask = 0;
+        mask = 0x80;
+      }
+
+      p->UnpackPositions[i] = unpackPos;
+      p->CRCs.Vals[i] = 0;
+      
+      if (emptyStreams && SzBitArray_Check(emptyStreams, i))
+      {
+        if (emptyFiles)
+        {
+          if (!SzBitArray_Check(emptyFiles, emptyFileIndex))
+            isDirMask |= mask;
+          emptyFileIndex++;
+        }
+        else
+          isDirMask |= mask;
+        if (remSubStreams == 0)
+        {
+          p->FileToFolder[i] = (UInt32)-1;
+          continue;
+        }
+      }
+      
+      if (remSubStreams == 0)
+      {
+        for (;;)
+        {
+          if (folderIndex >= p->db.NumFolders)
+            return SZ_ERROR_ARCHIVE;
+          p->FolderToFile[folderIndex] = i;
+          numSubStreams = 1;
+          if (ssi.sdNumSubStreams.Data)
+          {
+            RINOK(SzReadNumber32(&ssi.sdNumSubStreams, &numSubStreams));
+          }
+          remSubStreams = numSubStreams;
+          if (numSubStreams != 0)
+            break;
+          {
+            UInt64 folderUnpackSize = SzAr_GetFolderUnpackSize(&p->db, folderIndex);
+            unpackPos += folderUnpackSize;
+            if (unpackPos < folderUnpackSize)
+              return SZ_ERROR_ARCHIVE;
+          }
+
+          folderIndex++;
+        }
+      }
+      
+      p->FileToFolder[i] = folderIndex;
+      
+      if (emptyStreams && SzBitArray_Check(emptyStreams, i))
+        continue;
+      
+      if (--remSubStreams == 0)
+      {
+        UInt64 folderUnpackSize = SzAr_GetFolderUnpackSize(&p->db, folderIndex);
+        UInt64 startFolderUnpackPos = p->UnpackPositions[p->FolderToFile[folderIndex]];
+        if (folderUnpackSize < unpackPos - startFolderUnpackPos)
+          return SZ_ERROR_ARCHIVE;
+        unpackPos = startFolderUnpackPos + folderUnpackSize;
+        if (unpackPos < folderUnpackSize)
+          return SZ_ERROR_ARCHIVE;
+
+        if (numSubStreams == 1 && SzBitWithVals_Check(&p->db.FolderCRCs, i))
+        {
+          p->CRCs.Vals[i] = p->db.FolderCRCs.Vals[folderIndex];
+          crcMask |= mask;
+        }
+        else if (allDigestsDefined || (digestsDefs && SzBitArray_Check(digestsDefs, digestIndex)))
+        {
+          p->CRCs.Vals[i] = GetUi32(digestsVals + (size_t)digestsValsIndex * 4);
+          digestsValsIndex++;
+          crcMask |= mask;
+        }
+        
+        folderIndex++;
+      }
+      else
+      {
+        UInt64 v;
+        RINOK(ReadNumber(&ssi.sdSizes, &v));
+        unpackPos += v;
+        if (unpackPos < v)
+          return SZ_ERROR_ARCHIVE;
+        if (allDigestsDefined || (digestsDefs && SzBitArray_Check(digestsDefs, digestIndex)))
+        {
+          p->CRCs.Vals[i] = GetUi32(digestsVals + (size_t)digestsValsIndex * 4);
+          digestsValsIndex++;
+          crcMask |= mask;
+        }
+      }
+    }
+
+    if (mask != 0x80)
+    {
+      UInt32 byteIndex = (i - 1) >> 3;
+      p->IsDirs[byteIndex] = isDirMask;
+      p->CRCs.Defs[byteIndex] = crcMask;
+    }
+    
+    p->UnpackPositions[i] = unpackPos;
+
+    if (remSubStreams != 0)
+      return SZ_ERROR_ARCHIVE;
+
+    for (;;)
+    {
+      p->FolderToFile[folderIndex] = i;
+      if (folderIndex >= p->db.NumFolders)
+        break;
+      if (!ssi.sdNumSubStreams.Data)
+        return SZ_ERROR_ARCHIVE;
+      RINOK(SzReadNumber32(&ssi.sdNumSubStreams, &numSubStreams));
+      if (numSubStreams != 0)
+        return SZ_ERROR_ARCHIVE;
+      /*
+      {
+        UInt64 folderUnpackSize = SzAr_GetFolderUnpackSize(&p->db, folderIndex);
+        unpackPos += folderUnpackSize;
+        if (unpackPos < folderUnpackSize)
+          return SZ_ERROR_ARCHIVE;
+      }
+      */
+      folderIndex++;
+    }
+
+    if (ssi.sdNumSubStreams.Data && ssi.sdNumSubStreams.Size != 0)
+      return SZ_ERROR_ARCHIVE;
+  }
+}
+  return SZ_OK;
+}
+
+
+static SRes SzReadHeader(
+    CSzArEx *p,
+    CSzData *sd,
+    ILookInStream *inStream,
+    ISzAllocPtr allocMain,
+    ISzAllocPtr allocTemp)
+{
+  UInt32 i;
+  UInt32 numTempBufs = 0;
+  SRes res;
+  CBuf tempBufs[NUM_ADDITIONAL_STREAMS_MAX];
+
+  for (i = 0; i < NUM_ADDITIONAL_STREAMS_MAX; i++)
+    Buf_Init(tempBufs + i);
+  
+  res = SzReadHeader2(p, sd, inStream,
+      tempBufs, &numTempBufs,
+      allocMain, allocTemp);
+  
+  for (i = 0; i < NUM_ADDITIONAL_STREAMS_MAX; i++)
+    Buf_Free(tempBufs + i, allocTemp);
+
+  RINOK(res);
+
+  if (sd->Size != 0)
+    return SZ_ERROR_FAIL;
+
+  return res;
+}
+
+static SRes SzArEx_Open2(
+    CSzArEx *p,
+    ILookInStream *inStream,
+    ISzAllocPtr allocMain,
+    ISzAllocPtr allocTemp)
+{
+  Byte header[k7zStartHeaderSize];
+  Int64 startArcPos;
+  UInt64 nextHeaderOffset, nextHeaderSize;
+  size_t nextHeaderSizeT;
+  UInt32 nextHeaderCRC;
+  CBuf buf;
+  SRes res;
+
+  startArcPos = 0;
+  RINOK(ILookInStream_Seek(inStream, &startArcPos, SZ_SEEK_CUR));
+
+  RINOK(LookInStream_Read2(inStream, header, k7zStartHeaderSize, SZ_ERROR_NO_ARCHIVE));
+
+  if (!TestSignatureCandidate(header))
+    return SZ_ERROR_NO_ARCHIVE;
+  if (header[6] != k7zMajorVersion)
+    return SZ_ERROR_UNSUPPORTED;
+
+  nextHeaderOffset = GetUi64(header + 12);
+  nextHeaderSize = GetUi64(header + 20);
+  nextHeaderCRC = GetUi32(header + 28);
+
+  p->startPosAfterHeader = startArcPos + k7zStartHeaderSize;
+  
+  if (CrcCalc(header + 12, 20) != GetUi32(header + 8))
+    return SZ_ERROR_CRC;
+
+  nextHeaderSizeT = (size_t)nextHeaderSize;
+  if (nextHeaderSizeT != nextHeaderSize)
+    return SZ_ERROR_MEM;
+  if (nextHeaderSizeT == 0)
+    return SZ_OK;
+  if (nextHeaderOffset > nextHeaderOffset + nextHeaderSize ||
+      nextHeaderOffset > nextHeaderOffset + nextHeaderSize + k7zStartHeaderSize)
+    return SZ_ERROR_NO_ARCHIVE;
+
+  {
+    Int64 pos = 0;
+    RINOK(ILookInStream_Seek(inStream, &pos, SZ_SEEK_END));
+    if ((UInt64)pos < startArcPos + nextHeaderOffset ||
+        (UInt64)pos < startArcPos + k7zStartHeaderSize + nextHeaderOffset ||
+        (UInt64)pos < startArcPos + k7zStartHeaderSize + nextHeaderOffset + nextHeaderSize)
+      return SZ_ERROR_INPUT_EOF;
+  }
+
+  RINOK(LookInStream_SeekTo(inStream, startArcPos + k7zStartHeaderSize + nextHeaderOffset));
+
+  if (!Buf_Create(&buf, nextHeaderSizeT, allocTemp))
+    return SZ_ERROR_MEM;
+
+  res = LookInStream_Read(inStream, buf.data, nextHeaderSizeT);
+  
+  if (res == SZ_OK)
+  {
+    res = SZ_ERROR_ARCHIVE;
+    if (CrcCalc(buf.data, nextHeaderSizeT) == nextHeaderCRC)
+    {
+      CSzData sd;
+      UInt64 type;
+      sd.Data = buf.data;
+      sd.Size = buf.size;
+      
+      res = ReadID(&sd, &type);
+      
+      if (res == SZ_OK && type == k7zIdEncodedHeader)
+      {
+        CSzAr tempAr;
+        CBuf tempBuf;
+        Buf_Init(&tempBuf);
+        
+        SzAr_Init(&tempAr);
+        res = SzReadAndDecodePackedStreams(inStream, &sd, &tempBuf, 1, p->startPosAfterHeader, &tempAr, allocTemp);
+        SzAr_Free(&tempAr, allocTemp);
+       
+        if (res != SZ_OK)
+        {
+          Buf_Free(&tempBuf, allocTemp);
+        }
+        else
+        {
+          Buf_Free(&buf, allocTemp);
+          buf.data = tempBuf.data;
+          buf.size = tempBuf.size;
+          sd.Data = buf.data;
+          sd.Size = buf.size;
+          res = ReadID(&sd, &type);
+        }
+      }
+  
+      if (res == SZ_OK)
+      {
+        if (type == k7zIdHeader)
+        {
+          /*
+          CSzData sd2;
+          unsigned ttt;
+          for (ttt = 0; ttt < 40000; ttt++)
+          {
+            SzArEx_Free(p, allocMain);
+            sd2 = sd;
+            res = SzReadHeader(p, &sd2, inStream, allocMain, allocTemp);
+            if (res != SZ_OK)
+              break;
+          }
+          */
+          res = SzReadHeader(p, &sd, inStream, allocMain, allocTemp);
+        }
+        else
+          res = SZ_ERROR_UNSUPPORTED;
+      }
+    }
+  }
+ 
+  Buf_Free(&buf, allocTemp);
+  return res;
+}
+
+
+SRes SzArEx_Open(CSzArEx *p, ILookInStream *inStream,
+    ISzAllocPtr allocMain, ISzAllocPtr allocTemp)
+{
+  SRes res = SzArEx_Open2(p, inStream, allocMain, allocTemp);
+  if (res != SZ_OK)
+    SzArEx_Free(p, allocMain);
+  return res;
+}
+
+
+SRes SzArEx_Extract(
+    const CSzArEx *p,
+    ILookInStream *inStream,
+    UInt32 fileIndex,
+    UInt32 *blockIndex,
+    Byte **tempBuf,
+    size_t *outBufferSize,
+    size_t *offset,
+    size_t *outSizeProcessed,
+    ISzAllocPtr allocMain,
+    ISzAllocPtr allocTemp)
+{
+  UInt32 folderIndex = p->FileToFolder[fileIndex];
+  SRes res = SZ_OK;
+  
+  *offset = 0;
+  *outSizeProcessed = 0;
+  
+  if (folderIndex == (UInt32)-1)
+  {
+    ISzAlloc_Free(allocMain, *tempBuf);
+    *blockIndex = folderIndex;
+    *tempBuf = NULL;
+    *outBufferSize = 0;
+    return SZ_OK;
+  }
+
+  if (*tempBuf == NULL || *blockIndex != folderIndex)
+  {
+    UInt64 unpackSizeSpec = SzAr_GetFolderUnpackSize(&p->db, folderIndex);
+    /*
+    UInt64 unpackSizeSpec =
+        p->UnpackPositions[p->FolderToFile[(size_t)folderIndex + 1]] -
+        p->UnpackPositions[p->FolderToFile[folderIndex]];
+    */
+    size_t unpackSize = (size_t)unpackSizeSpec;
+
+    if (unpackSize != unpackSizeSpec)
+      return SZ_ERROR_MEM;
+    *blockIndex = folderIndex;
+    ISzAlloc_Free(allocMain, *tempBuf);
+    *tempBuf = NULL;
+    
+    if (res == SZ_OK)
+    {
+      *outBufferSize = unpackSize;
+      if (unpackSize != 0)
+      {
+        *tempBuf = (Byte *)ISzAlloc_Alloc(allocMain, unpackSize);
+        if (*tempBuf == NULL)
+          res = SZ_ERROR_MEM;
+      }
+  
+      if (res == SZ_OK)
+      {
+        res = SzAr_DecodeFolder(&p->db, folderIndex,
+            inStream, p->dataPos, *tempBuf, unpackSize, allocTemp);
+      }
+    }
+  }
+
+  if (res == SZ_OK)
+  {
+    UInt64 unpackPos = p->UnpackPositions[fileIndex];
+    *offset = (size_t)(unpackPos - p->UnpackPositions[p->FolderToFile[folderIndex]]);
+    *outSizeProcessed = (size_t)(p->UnpackPositions[(size_t)fileIndex + 1] - unpackPos);
+    if (*offset + *outSizeProcessed > *outBufferSize)
+      return SZ_ERROR_FAIL;
+    if (SzBitWithVals_Check(&p->CRCs, fileIndex))
+      if (CrcCalc(*tempBuf + *offset, *outSizeProcessed) != p->CRCs.Vals[fileIndex])
+        res = SZ_ERROR_CRC;
+  }
+
+  return res;
+}
+
+
+size_t SzArEx_GetFileNameUtf16(const CSzArEx *p, size_t fileIndex, UInt16 *dest)
+{
+  size_t offs = p->FileNameOffsets[fileIndex];
+  size_t len = p->FileNameOffsets[fileIndex + 1] - offs;
+  if (dest != 0)
+  {
+    size_t i;
+    const Byte *src = p->FileNames + offs * 2;
+    for (i = 0; i < len; i++)
+      dest[i] = GetUi16(src + i * 2);
+  }
+  return len;
+}
+
+/*
+size_t SzArEx_GetFullNameLen(const CSzArEx *p, size_t fileIndex)
+{
+  size_t len;
+  if (!p->FileNameOffsets)
+    return 1;
+  len = 0;
+  for (;;)
+  {
+    UInt32 parent = (UInt32)(Int32)-1;
+    len += p->FileNameOffsets[fileIndex + 1] - p->FileNameOffsets[fileIndex];
+    if SzBitWithVals_Check(&p->Parents, fileIndex)
+      parent = p->Parents.Vals[fileIndex];
+    if (parent == (UInt32)(Int32)-1)
+      return len;
+    fileIndex = parent;
+  }
+}
+
+UInt16 *SzArEx_GetFullNameUtf16_Back(const CSzArEx *p, size_t fileIndex, UInt16 *dest)
+{
+  BoolInt needSlash;
+  if (!p->FileNameOffsets)
+  {
+    *(--dest) = 0;
+    return dest;
+  }
+  needSlash = False;
+  for (;;)
+  {
+    UInt32 parent = (UInt32)(Int32)-1;
+    size_t curLen = p->FileNameOffsets[fileIndex + 1] - p->FileNameOffsets[fileIndex];
+    SzArEx_GetFileNameUtf16(p, fileIndex, dest - curLen);
+    if (needSlash)
+      *(dest - 1) = '/';
+    needSlash = True;
+    dest -= curLen;
+
+    if SzBitWithVals_Check(&p->Parents, fileIndex)
+      parent = p->Parents.Vals[fileIndex];
+    if (parent == (UInt32)(Int32)-1)
+      return dest;
+    fileIndex = parent;
+  }
+}
+*/
diff --git a/libraries/lzma/C/7zBuf.c b/libraries/lzma/C/7zBuf.c
new file mode 100644
index 000000000..8865c32a8
--- /dev/null
+++ b/libraries/lzma/C/7zBuf.c
@@ -0,0 +1,36 @@
+/* 7zBuf.c -- Byte Buffer
+2017-04-03 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "7zBuf.h"
+
+void Buf_Init(CBuf *p)
+{
+  p->data = 0;
+  p->size = 0;
+}
+
+int Buf_Create(CBuf *p, size_t size, ISzAllocPtr alloc)
+{
+  p->size = 0;
+  if (size == 0)
+  {
+    p->data = 0;
+    return 1;
+  }
+  p->data = (Byte *)ISzAlloc_Alloc(alloc, size);
+  if (p->data)
+  {
+    p->size = size;
+    return 1;
+  }
+  return 0;
+}
+
+void Buf_Free(CBuf *p, ISzAllocPtr alloc)
+{
+  ISzAlloc_Free(alloc, p->data);
+  p->data = 0;
+  p->size = 0;
+}
diff --git a/libraries/lzma/C/7zBuf.h b/libraries/lzma/C/7zBuf.h
new file mode 100644
index 000000000..81d1b5b64
--- /dev/null
+++ b/libraries/lzma/C/7zBuf.h
@@ -0,0 +1,35 @@
+/* 7zBuf.h -- Byte Buffer
+2017-04-03 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_BUF_H
+#define __7Z_BUF_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+typedef struct
+{
+  Byte *data;
+  size_t size;
+} CBuf;
+
+void Buf_Init(CBuf *p);
+int Buf_Create(CBuf *p, size_t size, ISzAllocPtr alloc);
+void Buf_Free(CBuf *p, ISzAllocPtr alloc);
+
+typedef struct
+{
+  Byte *data;
+  size_t size;
+  size_t pos;
+} CDynBuf;
+
+void DynBuf_Construct(CDynBuf *p);
+void DynBuf_SeekToBeg(CDynBuf *p);
+int DynBuf_Write(CDynBuf *p, const Byte *buf, size_t size, ISzAllocPtr alloc);
+void DynBuf_Free(CDynBuf *p, ISzAllocPtr alloc);
+
+EXTERN_C_END
+
+#endif
diff --git a/libraries/lzma/C/7zCrc.c b/libraries/lzma/C/7zCrc.c
new file mode 100644
index 000000000..b4d84f023
--- /dev/null
+++ b/libraries/lzma/C/7zCrc.c
@@ -0,0 +1,128 @@
+/* 7zCrc.c -- CRC32 init
+2017-06-06 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "7zCrc.h"
+#include "CpuArch.h"
+
+#define kCrcPoly 0xEDB88320
+
+#ifdef MY_CPU_LE
+  #define CRC_NUM_TABLES 8
+#else
+  #define CRC_NUM_TABLES 9
+
+  #define CRC_UINT32_SWAP(v) ((v >> 24) | ((v >> 8) & 0xFF00) | ((v << 8) & 0xFF0000) | (v << 24))
+
+  UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
+  UInt32 MY_FAST_CALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
+#endif
+
+#ifndef MY_CPU_BE
+  UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
+  UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
+#endif
+
+typedef UInt32 (MY_FAST_CALL *CRC_FUNC)(UInt32 v, const void *data, size_t size, const UInt32 *table);
+
+CRC_FUNC g_CrcUpdateT4;
+CRC_FUNC g_CrcUpdateT8;
+CRC_FUNC g_CrcUpdate;
+
+UInt32 g_CrcTable[256 * CRC_NUM_TABLES];
+
+UInt32 MY_FAST_CALL CrcUpdate(UInt32 v, const void *data, size_t size)
+{
+  return g_CrcUpdate(v, data, size, g_CrcTable);
+}
+
+UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size)
+{
+  return g_CrcUpdate(CRC_INIT_VAL, data, size, g_CrcTable) ^ CRC_INIT_VAL;
+}
+
+#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
+
+UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table)
+{
+  const Byte *p = (const Byte *)data;
+  const Byte *pEnd = p + size;
+  for (; p != pEnd; p++)
+    v = CRC_UPDATE_BYTE_2(v, *p);
+  return v;
+}
+
+void MY_FAST_CALL CrcGenerateTable()
+{
+  UInt32 i;
+  for (i = 0; i < 256; i++)
+  {
+    UInt32 r = i;
+    unsigned j;
+    for (j = 0; j < 8; j++)
+      r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1)));
+    g_CrcTable[i] = r;
+  }
+  for (i = 256; i < 256 * CRC_NUM_TABLES; i++)
+  {
+    UInt32 r = g_CrcTable[(size_t)i - 256];
+    g_CrcTable[i] = g_CrcTable[r & 0xFF] ^ (r >> 8);
+  }
+
+  #if CRC_NUM_TABLES < 4
+  
+  g_CrcUpdate = CrcUpdateT1;
+  
+  #else
+ 
+  #ifdef MY_CPU_LE
+
+    g_CrcUpdateT4 = CrcUpdateT4;
+    g_CrcUpdate = CrcUpdateT4;
+
+    #if CRC_NUM_TABLES >= 8
+      g_CrcUpdateT8 = CrcUpdateT8;
+  
+      #ifdef MY_CPU_X86_OR_AMD64
+      if (!CPU_Is_InOrder())
+      #endif
+        g_CrcUpdate = CrcUpdateT8;
+    #endif
+
+  #else
+  {
+    #ifndef MY_CPU_BE
+    UInt32 k = 0x01020304;
+    const Byte *p = (const Byte *)&k;
+    if (p[0] == 4 && p[1] == 3)
+    {
+      g_CrcUpdateT4 = CrcUpdateT4;
+      g_CrcUpdate = CrcUpdateT4;
+      #if CRC_NUM_TABLES >= 8
+      g_CrcUpdateT8 = CrcUpdateT8;
+      g_CrcUpdate = CrcUpdateT8;
+      #endif
+    }
+    else if (p[0] != 1 || p[1] != 2)
+      g_CrcUpdate = CrcUpdateT1;
+    else
+    #endif
+    {
+      for (i = 256 * CRC_NUM_TABLES - 1; i >= 256; i--)
+      {
+        UInt32 x = g_CrcTable[(size_t)i - 256];
+        g_CrcTable[i] = CRC_UINT32_SWAP(x);
+      }
+      g_CrcUpdateT4 = CrcUpdateT1_BeT4;
+      g_CrcUpdate = CrcUpdateT1_BeT4;
+      #if CRC_NUM_TABLES >= 8
+      g_CrcUpdateT8 = CrcUpdateT1_BeT8;
+      g_CrcUpdate = CrcUpdateT1_BeT8;
+      #endif
+    }
+  }
+  #endif
+
+  #endif
+}
diff --git a/libraries/lzma/C/7zCrc.h b/libraries/lzma/C/7zCrc.h
new file mode 100644
index 000000000..8fd579587
--- /dev/null
+++ b/libraries/lzma/C/7zCrc.h
@@ -0,0 +1,25 @@
+/* 7zCrc.h -- CRC32 calculation
+2013-01-18 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_CRC_H
+#define __7Z_CRC_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+extern UInt32 g_CrcTable[];
+
+/* Call CrcGenerateTable one time before other CRC functions */
+void MY_FAST_CALL CrcGenerateTable(void);
+
+#define CRC_INIT_VAL 0xFFFFFFFF
+#define CRC_GET_DIGEST(crc) ((crc) ^ CRC_INIT_VAL)
+#define CRC_UPDATE_BYTE(crc, b) (g_CrcTable[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
+
+UInt32 MY_FAST_CALL CrcUpdate(UInt32 crc, const void *data, size_t size);
+UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size);
+
+EXTERN_C_END
+
+#endif
diff --git a/libraries/lzma/C/7zCrcOpt.c b/libraries/lzma/C/7zCrcOpt.c
new file mode 100644
index 000000000..73beba298
--- /dev/null
+++ b/libraries/lzma/C/7zCrcOpt.c
@@ -0,0 +1,115 @@
+/* 7zCrcOpt.c -- CRC32 calculation
+2017-04-03 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "CpuArch.h"
+
+#ifndef MY_CPU_BE
+
+#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
+
+UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table)
+{
+  const Byte *p = (const Byte *)data;
+  for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++)
+    v = CRC_UPDATE_BYTE_2(v, *p);
+  for (; size >= 4; size -= 4, p += 4)
+  {
+    v ^= *(const UInt32 *)p;
+    v =
+          (table + 0x300)[((v      ) & 0xFF)]
+        ^ (table + 0x200)[((v >>  8) & 0xFF)]
+        ^ (table + 0x100)[((v >> 16) & 0xFF)]
+        ^ (table + 0x000)[((v >> 24))];
+  }
+  for (; size > 0; size--, p++)
+    v = CRC_UPDATE_BYTE_2(v, *p);
+  return v;
+}
+
+UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table)
+{
+  const Byte *p = (const Byte *)data;
+  for (; size > 0 && ((unsigned)(ptrdiff_t)p & 7) != 0; size--, p++)
+    v = CRC_UPDATE_BYTE_2(v, *p);
+  for (; size >= 8; size -= 8, p += 8)
+  {
+    UInt32 d;
+    v ^= *(const UInt32 *)p;
+    v =
+          (table + 0x700)[((v      ) & 0xFF)]
+        ^ (table + 0x600)[((v >>  8) & 0xFF)]
+        ^ (table + 0x500)[((v >> 16) & 0xFF)]
+        ^ (table + 0x400)[((v >> 24))];
+    d = *((const UInt32 *)p + 1);
+    v ^=
+          (table + 0x300)[((d      ) & 0xFF)]
+        ^ (table + 0x200)[((d >>  8) & 0xFF)]
+        ^ (table + 0x100)[((d >> 16) & 0xFF)]
+        ^ (table + 0x000)[((d >> 24))];
+  }
+  for (; size > 0; size--, p++)
+    v = CRC_UPDATE_BYTE_2(v, *p);
+  return v;
+}
+
+#endif
+
+
+#ifndef MY_CPU_LE
+
+#define CRC_UINT32_SWAP(v) ((v >> 24) | ((v >> 8) & 0xFF00) | ((v << 8) & 0xFF0000) | (v << 24))
+
+#define CRC_UPDATE_BYTE_2_BE(crc, b) (table[(((crc) >> 24) ^ (b))] ^ ((crc) << 8))
+
+UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table)
+{
+  const Byte *p = (const Byte *)data;
+  table += 0x100;
+  v = CRC_UINT32_SWAP(v);
+  for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++)
+    v = CRC_UPDATE_BYTE_2_BE(v, *p);
+  for (; size >= 4; size -= 4, p += 4)
+  {
+    v ^= *(const UInt32 *)p;
+    v =
+          (table + 0x000)[((v      ) & 0xFF)]
+        ^ (table + 0x100)[((v >>  8) & 0xFF)]
+        ^ (table + 0x200)[((v >> 16) & 0xFF)]
+        ^ (table + 0x300)[((v >> 24))];
+  }
+  for (; size > 0; size--, p++)
+    v = CRC_UPDATE_BYTE_2_BE(v, *p);
+  return CRC_UINT32_SWAP(v);
+}
+
+UInt32 MY_FAST_CALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table)
+{
+  const Byte *p = (const Byte *)data;
+  table += 0x100;
+  v = CRC_UINT32_SWAP(v);
+  for (; size > 0 && ((unsigned)(ptrdiff_t)p & 7) != 0; size--, p++)
+    v = CRC_UPDATE_BYTE_2_BE(v, *p);
+  for (; size >= 8; size -= 8, p += 8)
+  {
+    UInt32 d;
+    v ^= *(const UInt32 *)p;
+    v =
+          (table + 0x400)[((v      ) & 0xFF)]
+        ^ (table + 0x500)[((v >>  8) & 0xFF)]
+        ^ (table + 0x600)[((v >> 16) & 0xFF)]
+        ^ (table + 0x700)[((v >> 24))];
+    d = *((const UInt32 *)p + 1);
+    v ^=
+          (table + 0x000)[((d      ) & 0xFF)]
+        ^ (table + 0x100)[((d >>  8) & 0xFF)]
+        ^ (table + 0x200)[((d >> 16) & 0xFF)]
+        ^ (table + 0x300)[((d >> 24))];
+  }
+  for (; size > 0; size--, p++)
+    v = CRC_UPDATE_BYTE_2_BE(v, *p);
+  return CRC_UINT32_SWAP(v);
+}
+
+#endif
diff --git a/libraries/lzma/C/7zDec.c b/libraries/lzma/C/7zDec.c
new file mode 100644
index 000000000..7c4635211
--- /dev/null
+++ b/libraries/lzma/C/7zDec.c
@@ -0,0 +1,591 @@
+/* 7zDec.c -- Decoding from 7z folder
+2019-02-02 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include <string.h>
+
+/* #define _7ZIP_PPMD_SUPPPORT */
+
+#include "7z.h"
+#include "7zCrc.h"
+
+#include "Bcj2.h"
+#include "Bra.h"
+#include "CpuArch.h"
+#include "Delta.h"
+#include "LzmaDec.h"
+#include "Lzma2Dec.h"
+#ifdef _7ZIP_PPMD_SUPPPORT
+#include "Ppmd7.h"
+#endif
+
+#define k_Copy 0
+#define k_Delta 3
+#define k_LZMA2 0x21
+#define k_LZMA  0x30101
+#define k_BCJ   0x3030103
+#define k_BCJ2  0x303011B
+#define k_PPC   0x3030205
+#define k_IA64  0x3030401
+#define k_ARM   0x3030501
+#define k_ARMT  0x3030701
+#define k_SPARC 0x3030805
+
+
+#ifdef _7ZIP_PPMD_SUPPPORT
+
+#define k_PPMD 0x30401
+
+typedef struct
+{
+  IByteIn vt;
+  const Byte *cur;
+  const Byte *end;
+  const Byte *begin;
+  UInt64 processed;
+  BoolInt extra;
+  SRes res;
+  const ILookInStream *inStream;
+} CByteInToLook;
+
+static Byte ReadByte(const IByteIn *pp)
+{
+  CByteInToLook *p = CONTAINER_FROM_VTBL(pp, CByteInToLook, vt);
+  if (p->cur != p->end)
+    return *p->cur++;
+  if (p->res == SZ_OK)
+  {
+    size_t size = p->cur - p->begin;
+    p->processed += size;
+    p->res = ILookInStream_Skip(p->inStream, size);
+    size = (1 << 25);
+    p->res = ILookInStream_Look(p->inStream, (const void **)&p->begin, &size);
+    p->cur = p->begin;
+    p->end = p->begin + size;
+    if (size != 0)
+      return *p->cur++;;
+  }
+  p->extra = True;
+  return 0;
+}
+
+static SRes SzDecodePpmd(const Byte *props, unsigned propsSize, UInt64 inSize, const ILookInStream *inStream,
+    Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain)
+{
+  CPpmd7 ppmd;
+  CByteInToLook s;
+  SRes res = SZ_OK;
+
+  s.vt.Read = ReadByte;
+  s.inStream = inStream;
+  s.begin = s.end = s.cur = NULL;
+  s.extra = False;
+  s.res = SZ_OK;
+  s.processed = 0;
+
+  if (propsSize != 5)
+    return SZ_ERROR_UNSUPPORTED;
+
+  {
+    unsigned order = props[0];
+    UInt32 memSize = GetUi32(props + 1);
+    if (order < PPMD7_MIN_ORDER ||
+        order > PPMD7_MAX_ORDER ||
+        memSize < PPMD7_MIN_MEM_SIZE ||
+        memSize > PPMD7_MAX_MEM_SIZE)
+      return SZ_ERROR_UNSUPPORTED;
+    Ppmd7_Construct(&ppmd);
+    if (!Ppmd7_Alloc(&ppmd, memSize, allocMain))
+      return SZ_ERROR_MEM;
+    Ppmd7_Init(&ppmd, order);
+  }
+  {
+    CPpmd7z_RangeDec rc;
+    Ppmd7z_RangeDec_CreateVTable(&rc);
+    rc.Stream = &s.vt;
+    if (!Ppmd7z_RangeDec_Init(&rc))
+      res = SZ_ERROR_DATA;
+    else if (s.extra)
+      res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA);
+    else
+    {
+      SizeT i;
+      for (i = 0; i < outSize; i++)
+      {
+        int sym = Ppmd7_DecodeSymbol(&ppmd, &rc.vt);
+        if (s.extra || sym < 0)
+          break;
+        outBuffer[i] = (Byte)sym;
+      }
+      if (i != outSize)
+        res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA);
+      else if (s.processed + (s.cur - s.begin) != inSize || !Ppmd7z_RangeDec_IsFinishedOK(&rc))
+        res = SZ_ERROR_DATA;
+    }
+  }
+  Ppmd7_Free(&ppmd, allocMain);
+  return res;
+}
+
+#endif
+
+
+static SRes SzDecodeLzma(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStream *inStream,
+    Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain)
+{
+  CLzmaDec state;
+  SRes res = SZ_OK;
+
+  LzmaDec_Construct(&state);
+  RINOK(LzmaDec_AllocateProbs(&state, props, propsSize, allocMain));
+  state.dic = outBuffer;
+  state.dicBufSize = outSize;
+  LzmaDec_Init(&state);
+
+  for (;;)
+  {
+    const void *inBuf = NULL;
+    size_t lookahead = (1 << 18);
+    if (lookahead > inSize)
+      lookahead = (size_t)inSize;
+    res = ILookInStream_Look(inStream, &inBuf, &lookahead);
+    if (res != SZ_OK)
+      break;
+
+    {
+      SizeT inProcessed = (SizeT)lookahead, dicPos = state.dicPos;
+      ELzmaStatus status;
+      res = LzmaDec_DecodeToDic(&state, outSize, (const Byte *)inBuf, &inProcessed, LZMA_FINISH_END, &status);
+      lookahead -= inProcessed;
+      inSize -= inProcessed;
+      if (res != SZ_OK)
+        break;
+
+      if (status == LZMA_STATUS_FINISHED_WITH_MARK)
+      {
+        if (outSize != state.dicPos || inSize != 0)
+          res = SZ_ERROR_DATA;
+        break;
+      }
+
+      if (outSize == state.dicPos && inSize == 0 && status == LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK)
+        break;
+
+      if (inProcessed == 0 && dicPos == state.dicPos)
+      {
+        res = SZ_ERROR_DATA;
+        break;
+      }
+
+      res = ILookInStream_Skip(inStream, inProcessed);
+      if (res != SZ_OK)
+        break;
+    }
+  }
+
+  LzmaDec_FreeProbs(&state, allocMain);
+  return res;
+}
+
+
+#ifndef _7Z_NO_METHOD_LZMA2
+
+static SRes SzDecodeLzma2(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStream *inStream,
+    Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain)
+{
+  CLzma2Dec state;
+  SRes res = SZ_OK;
+
+  Lzma2Dec_Construct(&state);
+  if (propsSize != 1)
+    return SZ_ERROR_DATA;
+  RINOK(Lzma2Dec_AllocateProbs(&state, props[0], allocMain));
+  state.decoder.dic = outBuffer;
+  state.decoder.dicBufSize = outSize;
+  Lzma2Dec_Init(&state);
+
+  for (;;)
+  {
+    const void *inBuf = NULL;
+    size_t lookahead = (1 << 18);
+    if (lookahead > inSize)
+      lookahead = (size_t)inSize;
+    res = ILookInStream_Look(inStream, &inBuf, &lookahead);
+    if (res != SZ_OK)
+      break;
+
+    {
+      SizeT inProcessed = (SizeT)lookahead, dicPos = state.decoder.dicPos;
+      ELzmaStatus status;
+      res = Lzma2Dec_DecodeToDic(&state, outSize, (const Byte *)inBuf, &inProcessed, LZMA_FINISH_END, &status);
+      lookahead -= inProcessed;
+      inSize -= inProcessed;
+      if (res != SZ_OK)
+        break;
+
+      if (status == LZMA_STATUS_FINISHED_WITH_MARK)
+      {
+        if (outSize != state.decoder.dicPos || inSize != 0)
+          res = SZ_ERROR_DATA;
+        break;
+      }
+
+      if (inProcessed == 0 && dicPos == state.decoder.dicPos)
+      {
+        res = SZ_ERROR_DATA;
+        break;
+      }
+
+      res = ILookInStream_Skip(inStream, inProcessed);
+      if (res != SZ_OK)
+        break;
+    }
+  }
+
+  Lzma2Dec_FreeProbs(&state, allocMain);
+  return res;
+}
+
+#endif
+
+
+static SRes SzDecodeCopy(UInt64 inSize, ILookInStream *inStream, Byte *outBuffer)
+{
+  while (inSize > 0)
+  {
+    const void *inBuf;
+    size_t curSize = (1 << 18);
+    if (curSize > inSize)
+      curSize = (size_t)inSize;
+    RINOK(ILookInStream_Look(inStream, &inBuf, &curSize));
+    if (curSize == 0)
+      return SZ_ERROR_INPUT_EOF;
+    memcpy(outBuffer, inBuf, curSize);
+    outBuffer += curSize;
+    inSize -= curSize;
+    RINOK(ILookInStream_Skip(inStream, curSize));
+  }
+  return SZ_OK;
+}
+
+static BoolInt IS_MAIN_METHOD(UInt32 m)
+{
+  switch (m)
+  {
+    case k_Copy:
+    case k_LZMA:
+    #ifndef _7Z_NO_METHOD_LZMA2
+    case k_LZMA2:
+    #endif
+    #ifdef _7ZIP_PPMD_SUPPPORT
+    case k_PPMD:
+    #endif
+      return True;
+  }
+  return False;
+}
+
+static BoolInt IS_SUPPORTED_CODER(const CSzCoderInfo *c)
+{
+  return
+      c->NumStreams == 1
+      /* && c->MethodID <= (UInt32)0xFFFFFFFF */
+      && IS_MAIN_METHOD((UInt32)c->MethodID);
+}
+
+#define IS_BCJ2(c) ((c)->MethodID == k_BCJ2 && (c)->NumStreams == 4)
+
+static SRes CheckSupportedFolder(const CSzFolder *f)
+{
+  if (f->NumCoders < 1 || f->NumCoders > 4)
+    return SZ_ERROR_UNSUPPORTED;
+  if (!IS_SUPPORTED_CODER(&f->Coders[0]))
+    return SZ_ERROR_UNSUPPORTED;
+  if (f->NumCoders == 1)
+  {
+    if (f->NumPackStreams != 1 || f->PackStreams[0] != 0 || f->NumBonds != 0)
+      return SZ_ERROR_UNSUPPORTED;
+    return SZ_OK;
+  }
+  
+  
+  #ifndef _7Z_NO_METHODS_FILTERS
+
+  if (f->NumCoders == 2)
+  {
+    const CSzCoderInfo *c = &f->Coders[1];
+    if (
+        /* c->MethodID > (UInt32)0xFFFFFFFF || */
+        c->NumStreams != 1
+        || f->NumPackStreams != 1
+        || f->PackStreams[0] != 0
+        || f->NumBonds != 1
+        || f->Bonds[0].InIndex != 1
+        || f->Bonds[0].OutIndex != 0)
+      return SZ_ERROR_UNSUPPORTED;
+    switch ((UInt32)c->MethodID)
+    {
+      case k_Delta:
+      case k_BCJ:
+      case k_PPC:
+      case k_IA64:
+      case k_SPARC:
+      case k_ARM:
+      case k_ARMT:
+        break;
+      default:
+        return SZ_ERROR_UNSUPPORTED;
+    }
+    return SZ_OK;
+  }
+
+  #endif
+
+  
+  if (f->NumCoders == 4)
+  {
+    if (!IS_SUPPORTED_CODER(&f->Coders[1])
+        || !IS_SUPPORTED_CODER(&f->Coders[2])
+        || !IS_BCJ2(&f->Coders[3]))
+      return SZ_ERROR_UNSUPPORTED;
+    if (f->NumPackStreams != 4
+        || f->PackStreams[0] != 2
+        || f->PackStreams[1] != 6
+        || f->PackStreams[2] != 1
+        || f->PackStreams[3] != 0
+        || f->NumBonds != 3
+        || f->Bonds[0].InIndex != 5 || f->Bonds[0].OutIndex != 0
+        || f->Bonds[1].InIndex != 4 || f->Bonds[1].OutIndex != 1
+        || f->Bonds[2].InIndex != 3 || f->Bonds[2].OutIndex != 2)
+      return SZ_ERROR_UNSUPPORTED;
+    return SZ_OK;
+  }
+  
+  return SZ_ERROR_UNSUPPORTED;
+}
+
+#define CASE_BRA_CONV(isa) case k_ ## isa: isa ## _Convert(outBuffer, outSize, 0, 0); break;
+
+static SRes SzFolder_Decode2(const CSzFolder *folder,
+    const Byte *propsData,
+    const UInt64 *unpackSizes,
+    const UInt64 *packPositions,
+    ILookInStream *inStream, UInt64 startPos,
+    Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain,
+    Byte *tempBuf[])
+{
+  UInt32 ci;
+  SizeT tempSizes[3] = { 0, 0, 0};
+  SizeT tempSize3 = 0;
+  Byte *tempBuf3 = 0;
+
+  RINOK(CheckSupportedFolder(folder));
+
+  for (ci = 0; ci < folder->NumCoders; ci++)
+  {
+    const CSzCoderInfo *coder = &folder->Coders[ci];
+
+    if (IS_MAIN_METHOD((UInt32)coder->MethodID))
+    {
+      UInt32 si = 0;
+      UInt64 offset;
+      UInt64 inSize;
+      Byte *outBufCur = outBuffer;
+      SizeT outSizeCur = outSize;
+      if (folder->NumCoders == 4)
+      {
+        UInt32 indices[] = { 3, 2, 0 };
+        UInt64 unpackSize = unpackSizes[ci];
+        si = indices[ci];
+        if (ci < 2)
+        {
+          Byte *temp;
+          outSizeCur = (SizeT)unpackSize;
+          if (outSizeCur != unpackSize)
+            return SZ_ERROR_MEM;
+          temp = (Byte *)ISzAlloc_Alloc(allocMain, outSizeCur);
+          if (!temp && outSizeCur != 0)
+            return SZ_ERROR_MEM;
+          outBufCur = tempBuf[1 - ci] = temp;
+          tempSizes[1 - ci] = outSizeCur;
+        }
+        else if (ci == 2)
+        {
+          if (unpackSize > outSize) /* check it */
+            return SZ_ERROR_PARAM;
+          tempBuf3 = outBufCur = outBuffer + (outSize - (size_t)unpackSize);
+          tempSize3 = outSizeCur = (SizeT)unpackSize;
+        }
+        else
+          return SZ_ERROR_UNSUPPORTED;
+      }
+      offset = packPositions[si];
+      inSize = packPositions[(size_t)si + 1] - offset;
+      RINOK(LookInStream_SeekTo(inStream, startPos + offset));
+
+      if (coder->MethodID == k_Copy)
+      {
+        if (inSize != outSizeCur) /* check it */
+          return SZ_ERROR_DATA;
+        RINOK(SzDecodeCopy(inSize, inStream, outBufCur));
+      }
+      else if (coder->MethodID == k_LZMA)
+      {
+        RINOK(SzDecodeLzma(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain));
+      }
+      #ifndef _7Z_NO_METHOD_LZMA2
+      else if (coder->MethodID == k_LZMA2)
+      {
+        RINOK(SzDecodeLzma2(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain));
+      }
+      #endif
+      #ifdef _7ZIP_PPMD_SUPPPORT
+      else if (coder->MethodID == k_PPMD)
+      {
+        RINOK(SzDecodePpmd(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain));
+      }
+      #endif
+      else
+        return SZ_ERROR_UNSUPPORTED;
+    }
+    else if (coder->MethodID == k_BCJ2)
+    {
+      UInt64 offset = packPositions[1];
+      UInt64 s3Size = packPositions[2] - offset;
+      
+      if (ci != 3)
+        return SZ_ERROR_UNSUPPORTED;
+      
+      tempSizes[2] = (SizeT)s3Size;
+      if (tempSizes[2] != s3Size)
+        return SZ_ERROR_MEM;
+      tempBuf[2] = (Byte *)ISzAlloc_Alloc(allocMain, tempSizes[2]);
+      if (!tempBuf[2] && tempSizes[2] != 0)
+        return SZ_ERROR_MEM;
+      
+      RINOK(LookInStream_SeekTo(inStream, startPos + offset));
+      RINOK(SzDecodeCopy(s3Size, inStream, tempBuf[2]));
+
+      if ((tempSizes[0] & 3) != 0 ||
+          (tempSizes[1] & 3) != 0 ||
+          tempSize3 + tempSizes[0] + tempSizes[1] != outSize)
+        return SZ_ERROR_DATA;
+
+      {
+        CBcj2Dec p;
+        
+        p.bufs[0] = tempBuf3;   p.lims[0] = tempBuf3 + tempSize3;
+        p.bufs[1] = tempBuf[0]; p.lims[1] = tempBuf[0] + tempSizes[0];
+        p.bufs[2] = tempBuf[1]; p.lims[2] = tempBuf[1] + tempSizes[1];
+        p.bufs[3] = tempBuf[2]; p.lims[3] = tempBuf[2] + tempSizes[2];
+        
+        p.dest = outBuffer;
+        p.destLim = outBuffer + outSize;
+        
+        Bcj2Dec_Init(&p);
+        RINOK(Bcj2Dec_Decode(&p));
+
+        {
+          unsigned i;
+          for (i = 0; i < 4; i++)
+            if (p.bufs[i] != p.lims[i])
+              return SZ_ERROR_DATA;
+          
+          if (!Bcj2Dec_IsFinished(&p))
+            return SZ_ERROR_DATA;
+
+          if (p.dest != p.destLim
+             || p.state != BCJ2_STREAM_MAIN)
+            return SZ_ERROR_DATA;
+        }
+      }
+    }
+    #ifndef _7Z_NO_METHODS_FILTERS
+    else if (ci == 1)
+    {
+      if (coder->MethodID == k_Delta)
+      {
+        if (coder->PropsSize != 1)
+          return SZ_ERROR_UNSUPPORTED;
+        {
+          Byte state[DELTA_STATE_SIZE];
+          Delta_Init(state);
+          Delta_Decode(state, (unsigned)(propsData[coder->PropsOffset]) + 1, outBuffer, outSize);
+        }
+      }
+      else
+      {
+        if (coder->PropsSize != 0)
+          return SZ_ERROR_UNSUPPORTED;
+        switch (coder->MethodID)
+        {
+          case k_BCJ:
+          {
+            UInt32 state;
+            x86_Convert_Init(state);
+            x86_Convert(outBuffer, outSize, 0, &state, 0);
+            break;
+          }
+          CASE_BRA_CONV(PPC)
+          CASE_BRA_CONV(IA64)
+          CASE_BRA_CONV(SPARC)
+          CASE_BRA_CONV(ARM)
+          CASE_BRA_CONV(ARMT)
+          default:
+            return SZ_ERROR_UNSUPPORTED;
+        }
+      }
+    }
+    #endif
+    else
+      return SZ_ERROR_UNSUPPORTED;
+  }
+
+  return SZ_OK;
+}
+
+
+SRes SzAr_DecodeFolder(const CSzAr *p, UInt32 folderIndex,
+    ILookInStream *inStream, UInt64 startPos,
+    Byte *outBuffer, size_t outSize,
+    ISzAllocPtr allocMain)
+{
+  SRes res;
+  CSzFolder folder;
+  CSzData sd;
+  
+  const Byte *data = p->CodersData + p->FoCodersOffsets[folderIndex];
+  sd.Data = data;
+  sd.Size = p->FoCodersOffsets[(size_t)folderIndex + 1] - p->FoCodersOffsets[folderIndex];
+  
+  res = SzGetNextFolderItem(&folder, &sd);
+  
+  if (res != SZ_OK)
+    return res;
+
+  if (sd.Size != 0
+      || folder.UnpackStream != p->FoToMainUnpackSizeIndex[folderIndex]
+      || outSize != SzAr_GetFolderUnpackSize(p, folderIndex))
+    return SZ_ERROR_FAIL;
+  {
+    unsigned i;
+    Byte *tempBuf[3] = { 0, 0, 0};
+
+    res = SzFolder_Decode2(&folder, data,
+        &p->CoderUnpackSizes[p->FoToCoderUnpackSizes[folderIndex]],
+        p->PackPositions + p->FoStartPackStreamIndex[folderIndex],
+        inStream, startPos,
+        outBuffer, (SizeT)outSize, allocMain, tempBuf);
+    
+    for (i = 0; i < 3; i++)
+      ISzAlloc_Free(allocMain, tempBuf[i]);
+
+    if (res == SZ_OK)
+      if (SzBitWithVals_Check(&p->FolderCRCs, folderIndex))
+        if (CrcCalc(outBuffer, outSize) != p->FolderCRCs.Vals[folderIndex])
+          res = SZ_ERROR_CRC;
+
+    return res;
+  }
+}
diff --git a/libraries/lzma/C/7zStream.c b/libraries/lzma/C/7zStream.c
new file mode 100644
index 000000000..6b5aa1621
--- /dev/null
+++ b/libraries/lzma/C/7zStream.c
@@ -0,0 +1,176 @@
+/* 7zStream.c -- 7z Stream functions
+2017-04-03 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include <string.h>
+
+#include "7zTypes.h"
+
+SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType)
+{
+  while (size != 0)
+  {
+    size_t processed = size;
+    RINOK(ISeqInStream_Read(stream, buf, &processed));
+    if (processed == 0)
+      return errorType;
+    buf = (void *)((Byte *)buf + processed);
+    size -= processed;
+  }
+  return SZ_OK;
+}
+
+SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size)
+{
+  return SeqInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF);
+}
+
+SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf)
+{
+  size_t processed = 1;
+  RINOK(ISeqInStream_Read(stream, buf, &processed));
+  return (processed == 1) ? SZ_OK : SZ_ERROR_INPUT_EOF;
+}
+
+
+
+SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset)
+{
+  Int64 t = offset;
+  return ILookInStream_Seek(stream, &t, SZ_SEEK_SET);
+}
+
+SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size)
+{
+  const void *lookBuf;
+  if (*size == 0)
+    return SZ_OK;
+  RINOK(ILookInStream_Look(stream, &lookBuf, size));
+  memcpy(buf, lookBuf, *size);
+  return ILookInStream_Skip(stream, *size);
+}
+
+SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType)
+{
+  while (size != 0)
+  {
+    size_t processed = size;
+    RINOK(ILookInStream_Read(stream, buf, &processed));
+    if (processed == 0)
+      return errorType;
+    buf = (void *)((Byte *)buf + processed);
+    size -= processed;
+  }
+  return SZ_OK;
+}
+
+SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size)
+{
+  return LookInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF);
+}
+
+
+
+#define GET_LookToRead2 CLookToRead2 *p = CONTAINER_FROM_VTBL(pp, CLookToRead2, vt);
+
+static SRes LookToRead2_Look_Lookahead(const ILookInStream *pp, const void **buf, size_t *size)
+{
+  SRes res = SZ_OK;
+  GET_LookToRead2
+  size_t size2 = p->size - p->pos;
+  if (size2 == 0 && *size != 0)
+  {
+    p->pos = 0;
+    p->size = 0;
+    size2 = p->bufSize;
+    res = ISeekInStream_Read(p->realStream, p->buf, &size2);
+    p->size = size2;
+  }
+  if (*size > size2)
+    *size = size2;
+  *buf = p->buf + p->pos;
+  return res;
+}
+
+static SRes LookToRead2_Look_Exact(const ILookInStream *pp, const void **buf, size_t *size)
+{
+  SRes res = SZ_OK;
+  GET_LookToRead2
+  size_t size2 = p->size - p->pos;
+  if (size2 == 0 && *size != 0)
+  {
+    p->pos = 0;
+    p->size = 0;
+    if (*size > p->bufSize)
+      *size = p->bufSize;
+    res = ISeekInStream_Read(p->realStream, p->buf, size);
+    size2 = p->size = *size;
+  }
+  if (*size > size2)
+    *size = size2;
+  *buf = p->buf + p->pos;
+  return res;
+}
+
+static SRes LookToRead2_Skip(const ILookInStream *pp, size_t offset)
+{
+  GET_LookToRead2
+  p->pos += offset;
+  return SZ_OK;
+}
+
+static SRes LookToRead2_Read(const ILookInStream *pp, void *buf, size_t *size)
+{
+  GET_LookToRead2
+  size_t rem = p->size - p->pos;
+  if (rem == 0)
+    return ISeekInStream_Read(p->realStream, buf, size);
+  if (rem > *size)
+    rem = *size;
+  memcpy(buf, p->buf + p->pos, rem);
+  p->pos += rem;
+  *size = rem;
+  return SZ_OK;
+}
+
+static SRes LookToRead2_Seek(const ILookInStream *pp, Int64 *pos, ESzSeek origin)
+{
+  GET_LookToRead2
+  p->pos = p->size = 0;
+  return ISeekInStream_Seek(p->realStream, pos, origin);
+}
+
+void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead)
+{
+  p->vt.Look = lookahead ?
+      LookToRead2_Look_Lookahead :
+      LookToRead2_Look_Exact;
+  p->vt.Skip = LookToRead2_Skip;
+  p->vt.Read = LookToRead2_Read;
+  p->vt.Seek = LookToRead2_Seek;
+}
+
+
+
+static SRes SecToLook_Read(const ISeqInStream *pp, void *buf, size_t *size)
+{
+  CSecToLook *p = CONTAINER_FROM_VTBL(pp, CSecToLook, vt);
+  return LookInStream_LookRead(p->realStream, buf, size);
+}
+
+void SecToLook_CreateVTable(CSecToLook *p)
+{
+  p->vt.Read = SecToLook_Read;
+}
+
+static SRes SecToRead_Read(const ISeqInStream *pp, void *buf, size_t *size)
+{
+  CSecToRead *p = CONTAINER_FROM_VTBL(pp, CSecToRead, vt);
+  return ILookInStream_Read(p->realStream, buf, size);
+}
+
+void SecToRead_CreateVTable(CSecToRead *p)
+{
+  p->vt.Read = SecToRead_Read;
+}
diff --git a/libraries/lzma/C/7zTypes.h b/libraries/lzma/C/7zTypes.h
new file mode 100644
index 000000000..65b3af63c
--- /dev/null
+++ b/libraries/lzma/C/7zTypes.h
@@ -0,0 +1,375 @@
+/* 7zTypes.h -- Basic types
+2018-08-04 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_TYPES_H
+#define __7Z_TYPES_H
+
+#ifdef _WIN32
+/* #include <windows.h> */
+#endif
+
+#include <stddef.h>
+
+#ifndef EXTERN_C_BEGIN
+#ifdef __cplusplus
+#define EXTERN_C_BEGIN extern "C" {
+#define EXTERN_C_END }
+#else
+#define EXTERN_C_BEGIN
+#define EXTERN_C_END
+#endif
+#endif
+
+EXTERN_C_BEGIN
+
+#define SZ_OK 0
+
+#define SZ_ERROR_DATA 1
+#define SZ_ERROR_MEM 2
+#define SZ_ERROR_CRC 3
+#define SZ_ERROR_UNSUPPORTED 4
+#define SZ_ERROR_PARAM 5
+#define SZ_ERROR_INPUT_EOF 6
+#define SZ_ERROR_OUTPUT_EOF 7
+#define SZ_ERROR_READ 8
+#define SZ_ERROR_WRITE 9
+#define SZ_ERROR_PROGRESS 10
+#define SZ_ERROR_FAIL 11
+#define SZ_ERROR_THREAD 12
+
+#define SZ_ERROR_ARCHIVE 16
+#define SZ_ERROR_NO_ARCHIVE 17
+
+typedef int SRes;
+
+
+#ifdef _WIN32
+
+/* typedef DWORD WRes; */
+typedef unsigned WRes;
+#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x)
+
+#else
+
+typedef int WRes;
+#define MY__FACILITY_WIN32 7
+#define MY__FACILITY__WRes MY__FACILITY_WIN32
+#define MY_SRes_HRESULT_FROM_WRes(x) ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : ((HRESULT) (((x) & 0x0000FFFF) | (MY__FACILITY__WRes << 16) | 0x80000000)))
+
+#endif
+
+
+#ifndef RINOK
+#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
+#endif
+
+typedef unsigned char Byte;
+typedef short Int16;
+typedef unsigned short UInt16;
+
+#ifdef _LZMA_UINT32_IS_ULONG
+typedef long Int32;
+typedef unsigned long UInt32;
+#else
+typedef int Int32;
+typedef unsigned int UInt32;
+#endif
+
+#ifdef _SZ_NO_INT_64
+
+/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
+   NOTES: Some code will work incorrectly in that case! */
+
+typedef long Int64;
+typedef unsigned long UInt64;
+
+#else
+
+#if defined(_MSC_VER) || defined(__BORLANDC__)
+typedef __int64 Int64;
+typedef unsigned __int64 UInt64;
+#define UINT64_CONST(n) n
+#else
+typedef long long int Int64;
+typedef unsigned long long int UInt64;
+#define UINT64_CONST(n) n ## ULL
+#endif
+
+#endif
+
+#ifdef _LZMA_NO_SYSTEM_SIZE_T
+typedef UInt32 SizeT;
+#else
+typedef size_t SizeT;
+#endif
+
+typedef int BoolInt;
+/* typedef BoolInt Bool; */
+#define True 1
+#define False 0
+
+
+#ifdef _WIN32
+#define MY_STD_CALL __stdcall
+#else
+#define MY_STD_CALL
+#endif
+
+#ifdef _MSC_VER
+
+#if _MSC_VER >= 1300
+#define MY_NO_INLINE __declspec(noinline)
+#else
+#define MY_NO_INLINE
+#endif
+
+#define MY_FORCE_INLINE __forceinline
+
+#define MY_CDECL __cdecl
+#define MY_FAST_CALL __fastcall
+
+#else
+
+#define MY_NO_INLINE
+#define MY_FORCE_INLINE
+#define MY_CDECL
+#define MY_FAST_CALL
+
+/* inline keyword : for C++ / C99 */
+
+/* GCC, clang: */
+/*
+#if defined (__GNUC__) && (__GNUC__ >= 4)
+#define MY_FORCE_INLINE __attribute__((always_inline))
+#define MY_NO_INLINE __attribute__((noinline))
+#endif
+*/
+
+#endif
+
+
+/* The following interfaces use first parameter as pointer to structure */
+
+typedef struct IByteIn IByteIn;
+struct IByteIn
+{
+  Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */
+};
+#define IByteIn_Read(p) (p)->Read(p)
+
+
+typedef struct IByteOut IByteOut;
+struct IByteOut
+{
+  void (*Write)(const IByteOut *p, Byte b);
+};
+#define IByteOut_Write(p, b) (p)->Write(p, b)
+
+
+typedef struct ISeqInStream ISeqInStream;
+struct ISeqInStream
+{
+  SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size);
+    /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
+       (output(*size) < input(*size)) is allowed */
+};
+#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size)
+
+/* it can return SZ_ERROR_INPUT_EOF */
+SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size);
+SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType);
+SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf);
+
+
+typedef struct ISeqOutStream ISeqOutStream;
+struct ISeqOutStream
+{
+  size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size);
+    /* Returns: result - the number of actually written bytes.
+       (result < size) means error */
+};
+#define ISeqOutStream_Write(p, buf, size) (p)->Write(p, buf, size)
+
+typedef enum
+{
+  SZ_SEEK_SET = 0,
+  SZ_SEEK_CUR = 1,
+  SZ_SEEK_END = 2
+} ESzSeek;
+
+
+typedef struct ISeekInStream ISeekInStream;
+struct ISeekInStream
+{
+  SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size);  /* same as ISeqInStream::Read */
+  SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin);
+};
+#define ISeekInStream_Read(p, buf, size)   (p)->Read(p, buf, size)
+#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
+
+
+typedef struct ILookInStream ILookInStream;
+struct ILookInStream
+{
+  SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size);
+    /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
+       (output(*size) > input(*size)) is not allowed
+       (output(*size) < input(*size)) is allowed */
+  SRes (*Skip)(const ILookInStream *p, size_t offset);
+    /* offset must be <= output(*size) of Look */
+
+  SRes (*Read)(const ILookInStream *p, void *buf, size_t *size);
+    /* reads directly (without buffer). It's same as ISeqInStream::Read */
+  SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin);
+};
+
+#define ILookInStream_Look(p, buf, size)   (p)->Look(p, buf, size)
+#define ILookInStream_Skip(p, offset)      (p)->Skip(p, offset)
+#define ILookInStream_Read(p, buf, size)   (p)->Read(p, buf, size)
+#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
+
+
+SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size);
+SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset);
+
+/* reads via ILookInStream::Read */
+SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType);
+SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size);
+
+
+
+typedef struct
+{
+  ILookInStream vt;
+  const ISeekInStream *realStream;
+ 
+  size_t pos;
+  size_t size; /* it's data size */
+  
+  /* the following variables must be set outside */
+  Byte *buf;
+  size_t bufSize;
+} CLookToRead2;
+
+void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead);
+
+#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; }
+
+
+typedef struct
+{
+  ISeqInStream vt;
+  const ILookInStream *realStream;
+} CSecToLook;
+
+void SecToLook_CreateVTable(CSecToLook *p);
+
+
+
+typedef struct
+{
+  ISeqInStream vt;
+  const ILookInStream *realStream;
+} CSecToRead;
+
+void SecToRead_CreateVTable(CSecToRead *p);
+
+
+typedef struct ICompressProgress ICompressProgress;
+
+struct ICompressProgress
+{
+  SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize);
+    /* Returns: result. (result != SZ_OK) means break.
+       Value (UInt64)(Int64)-1 for size means unknown value. */
+};
+#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize)
+
+
+
+typedef struct ISzAlloc ISzAlloc;
+typedef const ISzAlloc * ISzAllocPtr;
+
+struct ISzAlloc
+{
+  void *(*Alloc)(ISzAllocPtr p, size_t size);
+  void (*Free)(ISzAllocPtr p, void *address); /* address can be 0 */
+};
+
+#define ISzAlloc_Alloc(p, size) (p)->Alloc(p, size)
+#define ISzAlloc_Free(p, a) (p)->Free(p, a)
+
+/* deprecated */
+#define IAlloc_Alloc(p, size) ISzAlloc_Alloc(p, size)
+#define IAlloc_Free(p, a) ISzAlloc_Free(p, a)
+
+
+
+
+
+#ifndef MY_offsetof
+  #ifdef offsetof
+    #define MY_offsetof(type, m) offsetof(type, m)
+    /*
+    #define MY_offsetof(type, m) FIELD_OFFSET(type, m)
+    */
+  #else
+    #define MY_offsetof(type, m) ((size_t)&(((type *)0)->m))
+  #endif
+#endif
+
+
+
+#ifndef MY_container_of
+
+/*
+#define MY_container_of(ptr, type, m) container_of(ptr, type, m)
+#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m)
+#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m)))
+#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m))))
+*/
+
+/*
+  GCC shows warning: "perhaps the 'offsetof' macro was used incorrectly"
+    GCC 3.4.4 : classes with constructor
+    GCC 4.8.1 : classes with non-public variable members"
+*/
+
+#define MY_container_of(ptr, type, m) ((type *)((char *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m)))
+
+
+#endif
+
+#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(ptr))
+
+/*
+#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
+*/
+#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m)
+
+#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
+/*
+#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m)
+*/
+
+
+
+#ifdef _WIN32
+
+#define CHAR_PATH_SEPARATOR '\\'
+#define WCHAR_PATH_SEPARATOR L'\\'
+#define STRING_PATH_SEPARATOR "\\"
+#define WSTRING_PATH_SEPARATOR L"\\"
+
+#else
+
+#define CHAR_PATH_SEPARATOR '/'
+#define WCHAR_PATH_SEPARATOR L'/'
+#define STRING_PATH_SEPARATOR "/"
+#define WSTRING_PATH_SEPARATOR L"/"
+
+#endif
+
+EXTERN_C_END
+
+#endif
diff --git a/libraries/lzma/C/7zVersion.h b/libraries/lzma/C/7zVersion.h
new file mode 100644
index 000000000..c176823a4
--- /dev/null
+++ b/libraries/lzma/C/7zVersion.h
@@ -0,0 +1,27 @@
+#define MY_VER_MAJOR 19
+#define MY_VER_MINOR 00
+#define MY_VER_BUILD 0
+#define MY_VERSION_NUMBERS "19.00"
+#define MY_VERSION MY_VERSION_NUMBERS
+
+#ifdef MY_CPU_NAME
+  #define MY_VERSION_CPU MY_VERSION " (" MY_CPU_NAME ")"
+#else
+  #define MY_VERSION_CPU MY_VERSION
+#endif
+
+#define MY_DATE "2019-02-21"
+#undef MY_COPYRIGHT
+#undef MY_VERSION_COPYRIGHT_DATE
+#define MY_AUTHOR_NAME "Igor Pavlov"
+#define MY_COPYRIGHT_PD "Igor Pavlov : Public domain"
+#define MY_COPYRIGHT_CR "Copyright (c) 1999-2018 Igor Pavlov"
+
+#ifdef USE_COPYRIGHT_CR
+  #define MY_COPYRIGHT MY_COPYRIGHT_CR
+#else
+  #define MY_COPYRIGHT MY_COPYRIGHT_PD
+#endif
+
+#define MY_COPYRIGHT_DATE MY_COPYRIGHT " : " MY_DATE
+#define MY_VERSION_COPYRIGHT_DATE MY_VERSION_CPU " : " MY_COPYRIGHT " : " MY_DATE
diff --git a/libraries/lzma/C/Bcj2.c b/libraries/lzma/C/Bcj2.c
new file mode 100644
index 000000000..9a0046a65
--- /dev/null
+++ b/libraries/lzma/C/Bcj2.c
@@ -0,0 +1,257 @@
+/* Bcj2.c -- BCJ2 Decoder (Converter for x86 code)
+2018-04-28 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "Bcj2.h"
+#include "CpuArch.h"
+
+#define CProb UInt16
+
+#define kTopValue ((UInt32)1 << 24)
+#define kNumModelBits 11
+#define kBitModelTotal (1 << kNumModelBits)
+#define kNumMoveBits 5
+
+#define _IF_BIT_0 ttt = *prob; bound = (p->range >> kNumModelBits) * ttt; if (p->code < bound)
+#define _UPDATE_0 p->range = bound; *prob = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
+#define _UPDATE_1 p->range -= bound; p->code -= bound; *prob = (CProb)(ttt - (ttt >> kNumMoveBits));
+
+void Bcj2Dec_Init(CBcj2Dec *p)
+{
+  unsigned i;
+
+  p->state = BCJ2_DEC_STATE_OK;
+  p->ip = 0;
+  p->temp[3] = 0;
+  p->range = 0;
+  p->code = 0;
+  for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)
+    p->probs[i] = kBitModelTotal >> 1;
+}
+
+SRes Bcj2Dec_Decode(CBcj2Dec *p)
+{
+  if (p->range <= 5)
+  {
+    p->state = BCJ2_DEC_STATE_OK;
+    for (; p->range != 5; p->range++)
+    {
+      if (p->range == 1 && p->code != 0)
+        return SZ_ERROR_DATA;
+      
+      if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
+      {
+        p->state = BCJ2_STREAM_RC;
+        return SZ_OK;
+      }
+
+      p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
+    }
+    
+    if (p->code == 0xFFFFFFFF)
+      return SZ_ERROR_DATA;
+    
+    p->range = 0xFFFFFFFF;
+  }
+  else if (p->state >= BCJ2_DEC_STATE_ORIG_0)
+  {
+    while (p->state <= BCJ2_DEC_STATE_ORIG_3)
+    {
+      Byte *dest = p->dest;
+      if (dest == p->destLim)
+        return SZ_OK;
+      *dest = p->temp[(size_t)p->state - BCJ2_DEC_STATE_ORIG_0];
+      p->state++;
+      p->dest = dest + 1;
+    }
+  }
+
+  /*
+  if (BCJ2_IS_32BIT_STREAM(p->state))
+  {
+    const Byte *cur = p->bufs[p->state];
+    if (cur == p->lims[p->state])
+      return SZ_OK;
+    p->bufs[p->state] = cur + 4;
+    
+    {
+      UInt32 val;
+      Byte *dest;
+      SizeT rem;
+      
+      p->ip += 4;
+      val = GetBe32(cur) - p->ip;
+      dest = p->dest;
+      rem = p->destLim - dest;
+      if (rem < 4)
+      {
+        SizeT i;
+        SetUi32(p->temp, val);
+        for (i = 0; i < rem; i++)
+          dest[i] = p->temp[i];
+        p->dest = dest + rem;
+        p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem;
+        return SZ_OK;
+      }
+      SetUi32(dest, val);
+      p->temp[3] = (Byte)(val >> 24);
+      p->dest = dest + 4;
+      p->state = BCJ2_DEC_STATE_OK;
+    }
+  }
+  */
+
+  for (;;)
+  {
+    if (BCJ2_IS_32BIT_STREAM(p->state))
+      p->state = BCJ2_DEC_STATE_OK;
+    else
+    {
+      if (p->range < kTopValue)
+      {
+        if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
+        {
+          p->state = BCJ2_STREAM_RC;
+          return SZ_OK;
+        }
+        p->range <<= 8;
+        p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
+      }
+
+      {
+        const Byte *src = p->bufs[BCJ2_STREAM_MAIN];
+        const Byte *srcLim;
+        Byte *dest;
+        SizeT num = p->lims[BCJ2_STREAM_MAIN] - src;
+        
+        if (num == 0)
+        {
+          p->state = BCJ2_STREAM_MAIN;
+          return SZ_OK;
+        }
+        
+        dest = p->dest;
+        if (num > (SizeT)(p->destLim - dest))
+        {
+          num = p->destLim - dest;
+          if (num == 0)
+          {
+            p->state = BCJ2_DEC_STATE_ORIG;
+            return SZ_OK;
+          }
+        }
+       
+        srcLim = src + num;
+
+        if (p->temp[3] == 0x0F && (src[0] & 0xF0) == 0x80)
+          *dest = src[0];
+        else for (;;)
+        {
+          Byte b = *src;
+          *dest = b;
+          if (b != 0x0F)
+          {
+            if ((b & 0xFE) == 0xE8)
+              break;
+            dest++;
+            if (++src != srcLim)
+              continue;
+            break;
+          }
+          dest++;
+          if (++src == srcLim)
+            break;
+          if ((*src & 0xF0) != 0x80)
+            continue;
+          *dest = *src;
+          break;
+        }
+        
+        num = src - p->bufs[BCJ2_STREAM_MAIN];
+        
+        if (src == srcLim)
+        {
+          p->temp[3] = src[-1];
+          p->bufs[BCJ2_STREAM_MAIN] = src;
+          p->ip += (UInt32)num;
+          p->dest += num;
+          p->state =
+            p->bufs[BCJ2_STREAM_MAIN] ==
+            p->lims[BCJ2_STREAM_MAIN] ?
+              (unsigned)BCJ2_STREAM_MAIN :
+              (unsigned)BCJ2_DEC_STATE_ORIG;
+          return SZ_OK;
+        }
+        
+        {
+          UInt32 bound, ttt;
+          CProb *prob;
+          Byte b = src[0];
+          Byte prev = (Byte)(num == 0 ? p->temp[3] : src[-1]);
+          
+          p->temp[3] = b;
+          p->bufs[BCJ2_STREAM_MAIN] = src + 1;
+          num++;
+          p->ip += (UInt32)num;
+          p->dest += num;
+          
+          prob = p->probs + (unsigned)(b == 0xE8 ? 2 + (unsigned)prev : (b == 0xE9 ? 1 : 0));
+          
+          _IF_BIT_0
+          {
+            _UPDATE_0
+            continue;
+          }
+          _UPDATE_1
+            
+        }
+      }
+    }
+
+    {
+      UInt32 val;
+      unsigned cj = (p->temp[3] == 0xE8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP;
+      const Byte *cur = p->bufs[cj];
+      Byte *dest;
+      SizeT rem;
+      
+      if (cur == p->lims[cj])
+      {
+        p->state = cj;
+        break;
+      }
+      
+      val = GetBe32(cur);
+      p->bufs[cj] = cur + 4;
+
+      p->ip += 4;
+      val -= p->ip;
+      dest = p->dest;
+      rem = p->destLim - dest;
+      
+      if (rem < 4)
+      {
+        p->temp[0] = (Byte)val; if (rem > 0) dest[0] = (Byte)val; val >>= 8;
+        p->temp[1] = (Byte)val; if (rem > 1) dest[1] = (Byte)val; val >>= 8;
+        p->temp[2] = (Byte)val; if (rem > 2) dest[2] = (Byte)val; val >>= 8;
+        p->temp[3] = (Byte)val;
+        p->dest = dest + rem;
+        p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem;
+        break;
+      }
+      
+      SetUi32(dest, val);
+      p->temp[3] = (Byte)(val >> 24);
+      p->dest = dest + 4;
+    }
+  }
+
+  if (p->range < kTopValue && p->bufs[BCJ2_STREAM_RC] != p->lims[BCJ2_STREAM_RC])
+  {
+    p->range <<= 8;
+    p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
+  }
+
+  return SZ_OK;
+}
diff --git a/libraries/lzma/C/Bcj2.h b/libraries/lzma/C/Bcj2.h
new file mode 100644
index 000000000..8824080ac
--- /dev/null
+++ b/libraries/lzma/C/Bcj2.h
@@ -0,0 +1,146 @@
+/* Bcj2.h -- BCJ2 Converter for x86 code
+2014-11-10 : Igor Pavlov : Public domain */
+
+#ifndef __BCJ2_H
+#define __BCJ2_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+#define BCJ2_NUM_STREAMS 4
+
+enum
+{
+  BCJ2_STREAM_MAIN,
+  BCJ2_STREAM_CALL,
+  BCJ2_STREAM_JUMP,
+  BCJ2_STREAM_RC
+};
+
+enum
+{
+  BCJ2_DEC_STATE_ORIG_0 = BCJ2_NUM_STREAMS,
+  BCJ2_DEC_STATE_ORIG_1,
+  BCJ2_DEC_STATE_ORIG_2,
+  BCJ2_DEC_STATE_ORIG_3,
+  
+  BCJ2_DEC_STATE_ORIG,
+  BCJ2_DEC_STATE_OK
+};
+
+enum
+{
+  BCJ2_ENC_STATE_ORIG = BCJ2_NUM_STREAMS,
+  BCJ2_ENC_STATE_OK
+};
+
+
+#define BCJ2_IS_32BIT_STREAM(s) ((s) == BCJ2_STREAM_CALL || (s) == BCJ2_STREAM_JUMP)
+
+/*
+CBcj2Dec / CBcj2Enc
+bufs sizes:
+  BUF_SIZE(n) = lims[n] - bufs[n]
+bufs sizes for BCJ2_STREAM_CALL and BCJ2_STREAM_JUMP must be mutliply of 4:
+    (BUF_SIZE(BCJ2_STREAM_CALL) & 3) == 0
+    (BUF_SIZE(BCJ2_STREAM_JUMP) & 3) == 0
+*/
+
+/*
+CBcj2Dec:
+dest is allowed to overlap with bufs[BCJ2_STREAM_MAIN], with the following conditions:
+  bufs[BCJ2_STREAM_MAIN] >= dest &&
+  bufs[BCJ2_STREAM_MAIN] - dest >= tempReserv +
+        BUF_SIZE(BCJ2_STREAM_CALL) +
+        BUF_SIZE(BCJ2_STREAM_JUMP)
+     tempReserv = 0 : for first call of Bcj2Dec_Decode
+     tempReserv = 4 : for any other calls of Bcj2Dec_Decode
+  overlap with offset = 1 is not allowed
+*/
+
+typedef struct
+{
+  const Byte *bufs[BCJ2_NUM_STREAMS];
+  const Byte *lims[BCJ2_NUM_STREAMS];
+  Byte *dest;
+  const Byte *destLim;
+
+  unsigned state; /* BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */
+
+  UInt32 ip;
+  Byte temp[4];
+  UInt32 range;
+  UInt32 code;
+  UInt16 probs[2 + 256];
+} CBcj2Dec;
+
+void Bcj2Dec_Init(CBcj2Dec *p);
+
+/* Returns: SZ_OK or SZ_ERROR_DATA */
+SRes Bcj2Dec_Decode(CBcj2Dec *p);
+
+#define Bcj2Dec_IsFinished(_p_) ((_p_)->code == 0)
+
+
+
+typedef enum
+{
+  BCJ2_ENC_FINISH_MODE_CONTINUE,
+  BCJ2_ENC_FINISH_MODE_END_BLOCK,
+  BCJ2_ENC_FINISH_MODE_END_STREAM
+} EBcj2Enc_FinishMode;
+
+typedef struct
+{
+  Byte *bufs[BCJ2_NUM_STREAMS];
+  const Byte *lims[BCJ2_NUM_STREAMS];
+  const Byte *src;
+  const Byte *srcLim;
+
+  unsigned state;
+  EBcj2Enc_FinishMode finishMode;
+
+  Byte prevByte;
+
+  Byte cache;
+  UInt32 range;
+  UInt64 low;
+  UInt64 cacheSize;
+
+  UInt32 ip;
+
+  /* 32-bit ralative offset in JUMP/CALL commands is
+       - (mod 4 GB)   in 32-bit mode
+       - signed Int32 in 64-bit mode
+     We use (mod 4 GB) check for fileSize.
+     Use fileSize up to 2 GB, if you want to support 32-bit and 64-bit code conversion. */
+  UInt32 fileIp;
+  UInt32 fileSize;    /* (fileSize <= ((UInt32)1 << 31)), 0 means no_limit */
+  UInt32 relatLimit;  /* (relatLimit <= ((UInt32)1 << 31)), 0 means desable_conversion */
+
+  UInt32 tempTarget;
+  unsigned tempPos;
+  Byte temp[4 * 2];
+
+  unsigned flushPos;
+  
+  UInt16 probs[2 + 256];
+} CBcj2Enc;
+
+void Bcj2Enc_Init(CBcj2Enc *p);
+void Bcj2Enc_Encode(CBcj2Enc *p);
+
+#define Bcj2Enc_Get_InputData_Size(p) ((SizeT)((p)->srcLim - (p)->src) + (p)->tempPos)
+#define Bcj2Enc_IsFinished(p) ((p)->flushPos == 5)
+
+
+#define BCJ2_RELAT_LIMIT_NUM_BITS 26
+#define BCJ2_RELAT_LIMIT ((UInt32)1 << BCJ2_RELAT_LIMIT_NUM_BITS)
+
+/* limit for CBcj2Enc::fileSize variable */
+#define BCJ2_FileSize_MAX ((UInt32)1 << 31)
+
+EXTERN_C_END
+
+#endif
diff --git a/libraries/lzma/C/Bra.c b/libraries/lzma/C/Bra.c
new file mode 100644
index 000000000..aed17e330
--- /dev/null
+++ b/libraries/lzma/C/Bra.c
@@ -0,0 +1,230 @@
+/* Bra.c -- Converters for RISC code
+2017-04-04 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "CpuArch.h"
+#include "Bra.h"
+
+SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
+{
+  Byte *p;
+  const Byte *lim;
+  size &= ~(size_t)3;
+  ip += 4;
+  p = data;
+  lim = data + size;
+
+  if (encoding)
+
+  for (;;)
+  {
+    for (;;)
+    {
+      if (p >= lim)
+        return p - data;
+      p += 4;
+      if (p[-1] == 0xEB)
+        break;
+    }
+    {
+      UInt32 v = GetUi32(p - 4);
+      v <<= 2;
+        v += ip + (UInt32)(p - data);
+      v >>= 2;
+      v &= 0x00FFFFFF;
+      v |= 0xEB000000;
+      SetUi32(p - 4, v);
+    }
+  }
+
+  for (;;)
+  {
+    for (;;)
+    {
+      if (p >= lim)
+        return p - data;
+      p += 4;
+      if (p[-1] == 0xEB)
+        break;
+    }
+    {
+      UInt32 v = GetUi32(p - 4);
+      v <<= 2;
+        v -= ip + (UInt32)(p - data);
+      v >>= 2;
+      v &= 0x00FFFFFF;
+      v |= 0xEB000000;
+      SetUi32(p - 4, v);
+    }
+  }
+}
+
+
+SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
+{
+  Byte *p;
+  const Byte *lim;
+  size &= ~(size_t)1;
+  p = data;
+  lim = data + size - 4;
+
+  if (encoding)
+  
+  for (;;)
+  {
+    UInt32 b1;
+    for (;;)
+    {
+      UInt32 b3;
+      if (p > lim)
+        return p - data;
+      b1 = p[1];
+      b3 = p[3];
+      p += 2;
+      b1 ^= 8;
+      if ((b3 & b1) >= 0xF8)
+        break;
+    }
+    {
+      UInt32 v =
+             ((UInt32)b1 << 19)
+          + (((UInt32)p[1] & 0x7) << 8)
+          + (((UInt32)p[-2] << 11))
+          + (p[0]);
+
+      p += 2;
+      {
+        UInt32 cur = (ip + (UInt32)(p - data)) >> 1;
+          v += cur;
+      }
+
+      p[-4] = (Byte)(v >> 11);
+      p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7));
+      p[-2] = (Byte)v;
+      p[-1] = (Byte)(0xF8 | (v >> 8));
+    }
+  }
+  
+  for (;;)
+  {
+    UInt32 b1;
+    for (;;)
+    {
+      UInt32 b3;
+      if (p > lim)
+        return p - data;
+      b1 = p[1];
+      b3 = p[3];
+      p += 2;
+      b1 ^= 8;
+      if ((b3 & b1) >= 0xF8)
+        break;
+    }
+    {
+      UInt32 v =
+             ((UInt32)b1 << 19)
+          + (((UInt32)p[1] & 0x7) << 8)
+          + (((UInt32)p[-2] << 11))
+          + (p[0]);
+
+      p += 2;
+      {
+        UInt32 cur = (ip + (UInt32)(p - data)) >> 1;
+          v -= cur;
+      }
+
+      /*
+      SetUi16(p - 4, (UInt16)(((v >> 11) & 0x7FF) | 0xF000));
+      SetUi16(p - 2, (UInt16)(v | 0xF800));
+      */
+      
+      p[-4] = (Byte)(v >> 11);
+      p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7));
+      p[-2] = (Byte)v;
+      p[-1] = (Byte)(0xF8 | (v >> 8));
+    }
+  }
+}
+
+
+SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
+{
+  Byte *p;
+  const Byte *lim;
+  size &= ~(size_t)3;
+  ip -= 4;
+  p = data;
+  lim = data + size;
+
+  for (;;)
+  {
+    for (;;)
+    {
+      if (p >= lim)
+        return p - data;
+      p += 4;
+      /* if ((v & 0xFC000003) == 0x48000001) */
+      if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1)
+        break;
+    }
+    {
+      UInt32 v = GetBe32(p - 4);
+      if (encoding)
+        v += ip + (UInt32)(p - data);
+      else
+        v -= ip + (UInt32)(p - data);
+      v &= 0x03FFFFFF;
+      v |= 0x48000000;
+      SetBe32(p - 4, v);
+    }
+  }
+}
+
+
+SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
+{
+  Byte *p;
+  const Byte *lim;
+  size &= ~(size_t)3;
+  ip -= 4;
+  p = data;
+  lim = data + size;
+
+  for (;;)
+  {
+    for (;;)
+    {
+      if (p >= lim)
+        return p - data;
+      /*
+      v = GetBe32(p);
+      p += 4;
+      m = v + ((UInt32)5 << 29);
+      m ^= (UInt32)7 << 29;
+      m += (UInt32)1 << 22;
+      if ((m & ((UInt32)0x1FF << 23)) == 0)
+        break;
+      */
+      p += 4;
+      if ((p[-4] == 0x40 && (p[-3] & 0xC0) == 0) ||
+          (p[-4] == 0x7F && (p[-3] >= 0xC0)))
+        break;
+    }
+    {
+      UInt32 v = GetBe32(p - 4);
+      v <<= 2;
+      if (encoding)
+        v += ip + (UInt32)(p - data);
+      else
+        v -= ip + (UInt32)(p - data);
+      
+      v &= 0x01FFFFFF;
+      v -= (UInt32)1 << 24;
+      v ^= 0xFF000000;
+      v >>= 2;
+      v |= 0x40000000;
+      SetBe32(p - 4, v);
+    }
+  }
+}
diff --git a/libraries/lzma/C/Bra.h b/libraries/lzma/C/Bra.h
new file mode 100644
index 000000000..855e37a6b
--- /dev/null
+++ b/libraries/lzma/C/Bra.h
@@ -0,0 +1,64 @@
+/* Bra.h -- Branch converters for executables
+2013-01-18 : Igor Pavlov : Public domain */
+
+#ifndef __BRA_H
+#define __BRA_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+/*
+These functions convert relative addresses to absolute addresses
+in CALL instructions to increase the compression ratio.
+  
+  In:
+    data     - data buffer
+    size     - size of data
+    ip       - current virtual Instruction Pinter (IP) value
+    state    - state variable for x86 converter
+    encoding - 0 (for decoding), 1 (for encoding)
+  
+  Out:
+    state    - state variable for x86 converter
+
+  Returns:
+    The number of processed bytes. If you call these functions with multiple calls,
+    you must start next call with first byte after block of processed bytes.
+  
+  Type   Endian  Alignment  LookAhead
+  
+  x86    little      1          4
+  ARMT   little      2          2
+  ARM    little      4          0
+  PPC     big        4          0
+  SPARC   big        4          0
+  IA64   little     16          0
+
+  size must be >= Alignment + LookAhead, if it's not last block.
+  If (size < Alignment + LookAhead), converter returns 0.
+
+  Example:
+
+    UInt32 ip = 0;
+    for ()
+    {
+      ; size must be >= Alignment + LookAhead, if it's not last block
+      SizeT processed = Convert(data, size, ip, 1);
+      data += processed;
+      size -= processed;
+      ip += processed;
+    }
+*/
+
+#define x86_Convert_Init(state) { state = 0; }
+SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding);
+SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
+SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
+SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
+SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
+SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
+
+EXTERN_C_END
+
+#endif
diff --git a/libraries/lzma/C/Bra86.c b/libraries/lzma/C/Bra86.c
new file mode 100644
index 000000000..93ed4d762
--- /dev/null
+++ b/libraries/lzma/C/Bra86.c
@@ -0,0 +1,82 @@
+/* Bra86.c -- Converter for x86 code (BCJ)
+2017-04-03 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "Bra.h"
+
+#define Test86MSByte(b) ((((b) + 1) & 0xFE) == 0)
+
+SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding)
+{
+  SizeT pos = 0;
+  UInt32 mask = *state & 7;
+  if (size < 5)
+    return 0;
+  size -= 4;
+  ip += 5;
+
+  for (;;)
+  {
+    Byte *p = data + pos;
+    const Byte *limit = data + size;
+    for (; p < limit; p++)
+      if ((*p & 0xFE) == 0xE8)
+        break;
+
+    {
+      SizeT d = (SizeT)(p - data - pos);
+      pos = (SizeT)(p - data);
+      if (p >= limit)
+      {
+        *state = (d > 2 ? 0 : mask >> (unsigned)d);
+        return pos;
+      }
+      if (d > 2)
+        mask = 0;
+      else
+      {
+        mask >>= (unsigned)d;
+        if (mask != 0 && (mask > 4 || mask == 3 || Test86MSByte(p[(size_t)(mask >> 1) + 1])))
+        {
+          mask = (mask >> 1) | 4;
+          pos++;
+          continue;
+        }
+      }
+    }
+
+    if (Test86MSByte(p[4]))
+    {
+      UInt32 v = ((UInt32)p[4] << 24) | ((UInt32)p[3] << 16) | ((UInt32)p[2] << 8) | ((UInt32)p[1]);
+      UInt32 cur = ip + (UInt32)pos;
+      pos += 5;
+      if (encoding)
+        v += cur;
+      else
+        v -= cur;
+      if (mask != 0)
+      {
+        unsigned sh = (mask & 6) << 2;
+        if (Test86MSByte((Byte)(v >> sh)))
+        {
+          v ^= (((UInt32)0x100 << sh) - 1);
+          if (encoding)
+            v += cur;
+          else
+            v -= cur;
+        }
+        mask = 0;
+      }
+      p[1] = (Byte)v;
+      p[2] = (Byte)(v >> 8);
+      p[3] = (Byte)(v >> 16);
+      p[4] = (Byte)(0 - ((v >> 24) & 1));
+    }
+    else
+    {
+      mask = (mask >> 1) | 4;
+      pos++;
+    }
+  }
+}
diff --git a/libraries/lzma/C/BraIA64.c b/libraries/lzma/C/BraIA64.c
new file mode 100644
index 000000000..d1dbc62c5
--- /dev/null
+++ b/libraries/lzma/C/BraIA64.c
@@ -0,0 +1,53 @@
+/* BraIA64.c -- Converter for IA-64 code
+2017-01-26 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "CpuArch.h"
+#include "Bra.h"
+
+SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
+{
+  SizeT i;
+  if (size < 16)
+    return 0;
+  size -= 16;
+  i = 0;
+  do
+  {
+    unsigned m = ((UInt32)0x334B0000 >> (data[i] & 0x1E)) & 3;
+    if (m)
+    {
+      m++;
+      do
+      {
+        Byte *p = data + (i + (size_t)m * 5 - 8);
+        if (((p[3] >> m) & 15) == 5
+            && (((p[-1] | ((UInt32)p[0] << 8)) >> m) & 0x70) == 0)
+        {
+          unsigned raw = GetUi32(p);
+          unsigned v = raw >> m;
+          v = (v & 0xFFFFF) | ((v & (1 << 23)) >> 3);
+          
+          v <<= 4;
+          if (encoding)
+            v += ip + (UInt32)i;
+          else
+            v -= ip + (UInt32)i;
+          v >>= 4;
+          
+          v &= 0x1FFFFF;
+          v += 0x700000;
+          v &= 0x8FFFFF;
+          raw &= ~((UInt32)0x8FFFFF << m);
+          raw |= (v << m);
+          SetUi32(p, raw);
+        }
+      }
+      while (++m <= 4);
+    }
+    i += 16;
+  }
+  while (i <= size);
+  return i;
+}
diff --git a/libraries/lzma/C/Compiler.h b/libraries/lzma/C/Compiler.h
new file mode 100644
index 000000000..0cc409d8a
--- /dev/null
+++ b/libraries/lzma/C/Compiler.h
@@ -0,0 +1,33 @@
+/* Compiler.h
+2017-04-03 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_COMPILER_H
+#define __7Z_COMPILER_H
+
+#ifdef _MSC_VER
+
+  #ifdef UNDER_CE
+    #define RPC_NO_WINDOWS_H
+    /* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */
+    #pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union
+    #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int
+  #endif
+
+  #if _MSC_VER >= 1300
+    #pragma warning(disable : 4996) // This function or variable may be unsafe
+  #else
+    #pragma warning(disable : 4511) // copy constructor could not be generated
+    #pragma warning(disable : 4512) // assignment operator could not be generated
+    #pragma warning(disable : 4514) // unreferenced inline function has been removed
+    #pragma warning(disable : 4702) // unreachable code
+    #pragma warning(disable : 4710) // not inlined
+    #pragma warning(disable : 4714) // function marked as __forceinline not inlined
+    #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information
+  #endif
+
+#endif
+
+#define UNUSED_VAR(x) (void)x;
+/* #define UNUSED_VAR(x) x=x; */
+
+#endif
diff --git a/libraries/lzma/C/CpuArch.c b/libraries/lzma/C/CpuArch.c
new file mode 100644
index 000000000..02e482e08
--- /dev/null
+++ b/libraries/lzma/C/CpuArch.c
@@ -0,0 +1,218 @@
+/* CpuArch.c -- CPU specific code
+2018-02-18: Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "CpuArch.h"
+
+#ifdef MY_CPU_X86_OR_AMD64
+
+#if (defined(_MSC_VER) && !defined(MY_CPU_AMD64)) || defined(__GNUC__)
+#define USE_ASM
+#endif
+
+#if !defined(USE_ASM) && _MSC_VER >= 1500
+#include <intrin.h>
+#endif
+
+#if defined(USE_ASM) && !defined(MY_CPU_AMD64)
+static UInt32 CheckFlag(UInt32 flag)
+{
+  #ifdef _MSC_VER
+  __asm pushfd;
+  __asm pop EAX;
+  __asm mov EDX, EAX;
+  __asm xor EAX, flag;
+  __asm push EAX;
+  __asm popfd;
+  __asm pushfd;
+  __asm pop EAX;
+  __asm xor EAX, EDX;
+  __asm push EDX;
+  __asm popfd;
+  __asm and flag, EAX;
+  #else
+  __asm__ __volatile__ (
+    "pushf\n\t"
+    "pop  %%EAX\n\t"
+    "movl %%EAX,%%EDX\n\t"
+    "xorl %0,%%EAX\n\t"
+    "push %%EAX\n\t"
+    "popf\n\t"
+    "pushf\n\t"
+    "pop  %%EAX\n\t"
+    "xorl %%EDX,%%EAX\n\t"
+    "push %%EDX\n\t"
+    "popf\n\t"
+    "andl %%EAX, %0\n\t":
+    "=c" (flag) : "c" (flag) :
+    "%eax", "%edx");
+  #endif
+  return flag;
+}
+#define CHECK_CPUID_IS_SUPPORTED if (CheckFlag(1 << 18) == 0 || CheckFlag(1 << 21) == 0) return False;
+#else
+#define CHECK_CPUID_IS_SUPPORTED
+#endif
+
+void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
+{
+  #ifdef USE_ASM
+
+  #ifdef _MSC_VER
+
+  UInt32 a2, b2, c2, d2;
+  __asm xor EBX, EBX;
+  __asm xor ECX, ECX;
+  __asm xor EDX, EDX;
+  __asm mov EAX, function;
+  __asm cpuid;
+  __asm mov a2, EAX;
+  __asm mov b2, EBX;
+  __asm mov c2, ECX;
+  __asm mov d2, EDX;
+
+  *a = a2;
+  *b = b2;
+  *c = c2;
+  *d = d2;
+
+  #else
+
+  __asm__ __volatile__ (
+  #if defined(MY_CPU_AMD64) && defined(__PIC__)
+    "mov %%rbx, %%rdi;"
+    "cpuid;"
+    "xchg %%rbx, %%rdi;"
+    : "=a" (*a) ,
+      "=D" (*b) ,
+  #elif defined(MY_CPU_X86) && defined(__PIC__)
+    "mov %%ebx, %%edi;"
+    "cpuid;"
+    "xchgl %%ebx, %%edi;"
+    : "=a" (*a) ,
+      "=D" (*b) ,
+  #else
+    "cpuid"
+    : "=a" (*a) ,
+      "=b" (*b) ,
+  #endif
+      "=c" (*c) ,
+      "=d" (*d)
+    : "0" (function)) ;
+
+  #endif
+  
+  #else
+
+  int CPUInfo[4];
+  __cpuid(CPUInfo, function);
+  *a = CPUInfo[0];
+  *b = CPUInfo[1];
+  *c = CPUInfo[2];
+  *d = CPUInfo[3];
+
+  #endif
+}
+
+BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p)
+{
+  CHECK_CPUID_IS_SUPPORTED
+  MyCPUID(0, &p->maxFunc, &p->vendor[0], &p->vendor[2], &p->vendor[1]);
+  MyCPUID(1, &p->ver, &p->b, &p->c, &p->d);
+  return True;
+}
+
+static const UInt32 kVendors[][3] =
+{
+  { 0x756E6547, 0x49656E69, 0x6C65746E},
+  { 0x68747541, 0x69746E65, 0x444D4163},
+  { 0x746E6543, 0x48727561, 0x736C7561}
+};
+
+int x86cpuid_GetFirm(const Cx86cpuid *p)
+{
+  unsigned i;
+  for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[i]); i++)
+  {
+    const UInt32 *v = kVendors[i];
+    if (v[0] == p->vendor[0] &&
+        v[1] == p->vendor[1] &&
+        v[2] == p->vendor[2])
+      return (int)i;
+  }
+  return -1;
+}
+
+BoolInt CPU_Is_InOrder()
+{
+  Cx86cpuid p;
+  int firm;
+  UInt32 family, model;
+  if (!x86cpuid_CheckAndRead(&p))
+    return True;
+
+  family = x86cpuid_GetFamily(p.ver);
+  model = x86cpuid_GetModel(p.ver);
+  
+  firm = x86cpuid_GetFirm(&p);
+
+  switch (firm)
+  {
+    case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && (
+        /* In-Order Atom CPU */
+           model == 0x1C  /* 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 */
+        || model == 0x26  /* 45 nm, Z6xx */
+        || model == 0x27  /* 32 nm, Z2460 */
+        || model == 0x35  /* 32 nm, Z2760 */
+        || model == 0x36  /* 32 nm, N2xxx, D2xxx */
+        )));
+    case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA)));
+    case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF));
+  }
+  return True;
+}
+
+#if !defined(MY_CPU_AMD64) && defined(_WIN32)
+#include <windows.h>
+static BoolInt CPU_Sys_Is_SSE_Supported()
+{
+  OSVERSIONINFO vi;
+  vi.dwOSVersionInfoSize = sizeof(vi);
+  if (!GetVersionEx(&vi))
+    return False;
+  return (vi.dwMajorVersion >= 5);
+}
+#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False;
+#else
+#define CHECK_SYS_SSE_SUPPORT
+#endif
+
+BoolInt CPU_Is_Aes_Supported()
+{
+  Cx86cpuid p;
+  CHECK_SYS_SSE_SUPPORT
+  if (!x86cpuid_CheckAndRead(&p))
+    return False;
+  return (p.c >> 25) & 1;
+}
+
+BoolInt CPU_IsSupported_PageGB()
+{
+  Cx86cpuid cpuid;
+  if (!x86cpuid_CheckAndRead(&cpuid))
+    return False;
+  {
+    UInt32 d[4] = { 0 };
+    MyCPUID(0x80000000, &d[0], &d[1], &d[2], &d[3]);
+    if (d[0] < 0x80000001)
+      return False;
+  }
+  {
+    UInt32 d[4] = { 0 };
+    MyCPUID(0x80000001, &d[0], &d[1], &d[2], &d[3]);
+    return (d[3] >> 26) & 1;
+  }
+}
+
+#endif
diff --git a/libraries/lzma/C/CpuArch.h b/libraries/lzma/C/CpuArch.h
new file mode 100644
index 000000000..bd4293880
--- /dev/null
+++ b/libraries/lzma/C/CpuArch.h
@@ -0,0 +1,336 @@
+/* CpuArch.h -- CPU specific code
+2018-02-18 : Igor Pavlov : Public domain */
+
+#ifndef __CPU_ARCH_H
+#define __CPU_ARCH_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+/*
+MY_CPU_LE means that CPU is LITTLE ENDIAN.
+MY_CPU_BE means that CPU is BIG ENDIAN.
+If MY_CPU_LE and MY_CPU_BE are not defined, we don't know about ENDIANNESS of platform.
+
+MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses.
+*/
+
+#if  defined(_M_X64) \
+  || defined(_M_AMD64) \
+  || defined(__x86_64__) \
+  || defined(__AMD64__) \
+  || defined(__amd64__)
+  #define MY_CPU_AMD64
+  #ifdef __ILP32__
+    #define MY_CPU_NAME "x32"
+  #else
+    #define MY_CPU_NAME "x64"
+  #endif
+  #define MY_CPU_64BIT
+#endif
+
+
+#if  defined(_M_IX86) \
+  || defined(__i386__)
+  #define MY_CPU_X86
+  #define MY_CPU_NAME "x86"
+  #define MY_CPU_32BIT
+#endif
+
+
+#if  defined(_M_ARM64) \
+  || defined(__AARCH64EL__) \
+  || defined(__AARCH64EB__) \
+  || defined(__aarch64__)
+  #define MY_CPU_ARM64
+  #define MY_CPU_NAME "arm64"
+  #define MY_CPU_64BIT
+#endif
+
+
+#if  defined(_M_ARM) \
+  || defined(_M_ARM_NT) \
+  || defined(_M_ARMT) \
+  || defined(__arm__) \
+  || defined(__thumb__) \
+  || defined(__ARMEL__) \
+  || defined(__ARMEB__) \
+  || defined(__THUMBEL__) \
+  || defined(__THUMBEB__)
+  #define MY_CPU_ARM
+  #define MY_CPU_NAME "arm"
+  #define MY_CPU_32BIT
+#endif
+
+
+#if  defined(_M_IA64) \
+  || defined(__ia64__)
+  #define MY_CPU_IA64
+  #define MY_CPU_NAME "ia64"
+  #define MY_CPU_64BIT
+#endif
+
+
+#if  defined(__mips64) \
+  || defined(__mips64__) \
+  || (defined(__mips) && (__mips == 64 || __mips == 4 || __mips == 3))
+  #define MY_CPU_NAME "mips64"
+  #define MY_CPU_64BIT
+#elif defined(__mips__)
+  #define MY_CPU_NAME "mips"
+  /* #define MY_CPU_32BIT */
+#endif
+
+
+#if  defined(__ppc64__) \
+  || defined(__powerpc64__)
+  #ifdef __ILP32__
+    #define MY_CPU_NAME "ppc64-32"
+  #else
+    #define MY_CPU_NAME "ppc64"
+  #endif
+  #define MY_CPU_64BIT
+#elif defined(__ppc__) \
+  || defined(__powerpc__)
+  #define MY_CPU_NAME "ppc"
+  #define MY_CPU_32BIT
+#endif
+
+
+#if  defined(__sparc64__)
+  #define MY_CPU_NAME "sparc64"
+  #define MY_CPU_64BIT
+#elif defined(__sparc__)
+  #define MY_CPU_NAME "sparc"
+  /* #define MY_CPU_32BIT */
+#endif
+
+
+#if defined(MY_CPU_X86) || defined(MY_CPU_AMD64)
+#define MY_CPU_X86_OR_AMD64
+#endif
+
+
+#ifdef _WIN32
+
+  #ifdef MY_CPU_ARM
+  #define MY_CPU_ARM_LE
+  #endif
+
+  #ifdef MY_CPU_ARM64
+  #define MY_CPU_ARM64_LE
+  #endif
+
+  #ifdef _M_IA64
+  #define MY_CPU_IA64_LE
+  #endif
+
+#endif
+
+
+#if defined(MY_CPU_X86_OR_AMD64) \
+    || defined(MY_CPU_ARM_LE) \
+    || defined(MY_CPU_ARM64_LE) \
+    || defined(MY_CPU_IA64_LE) \
+    || defined(__LITTLE_ENDIAN__) \
+    || defined(__ARMEL__) \
+    || defined(__THUMBEL__) \
+    || defined(__AARCH64EL__) \
+    || defined(__MIPSEL__) \
+    || defined(__MIPSEL) \
+    || defined(_MIPSEL) \
+    || defined(__BFIN__) \
+    || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
+  #define MY_CPU_LE
+#endif
+
+#if defined(__BIG_ENDIAN__) \
+    || defined(__ARMEB__) \
+    || defined(__THUMBEB__) \
+    || defined(__AARCH64EB__) \
+    || defined(__MIPSEB__) \
+    || defined(__MIPSEB) \
+    || defined(_MIPSEB) \
+    || defined(__m68k__) \
+    || defined(__s390__) \
+    || defined(__s390x__) \
+    || defined(__zarch__) \
+    || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))
+  #define MY_CPU_BE
+#endif
+
+
+#if defined(MY_CPU_LE) && defined(MY_CPU_BE)
+  #error Stop_Compiling_Bad_Endian
+#endif
+
+
+#if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT)
+  #error Stop_Compiling_Bad_32_64_BIT
+#endif
+
+
+#ifndef MY_CPU_NAME
+  #ifdef MY_CPU_LE
+    #define MY_CPU_NAME "LE"
+  #elif defined(MY_CPU_BE)
+    #define MY_CPU_NAME "BE"
+  #else
+    /*
+    #define MY_CPU_NAME ""
+    */
+  #endif
+#endif
+
+
+
+
+
+#ifdef MY_CPU_LE
+  #if defined(MY_CPU_X86_OR_AMD64) \
+      || defined(MY_CPU_ARM64) \
+      || defined(__ARM_FEATURE_UNALIGNED)
+    #define MY_CPU_LE_UNALIGN
+  #endif
+#endif
+
+
+#ifdef MY_CPU_LE_UNALIGN
+
+#define GetUi16(p) (*(const UInt16 *)(const void *)(p))
+#define GetUi32(p) (*(const UInt32 *)(const void *)(p))
+#define GetUi64(p) (*(const UInt64 *)(const void *)(p))
+
+#define SetUi16(p, v) { *(UInt16 *)(p) = (v); }
+#define SetUi32(p, v) { *(UInt32 *)(p) = (v); }
+#define SetUi64(p, v) { *(UInt64 *)(p) = (v); }
+
+#else
+
+#define GetUi16(p) ( (UInt16) ( \
+             ((const Byte *)(p))[0] | \
+    ((UInt16)((const Byte *)(p))[1] << 8) ))
+
+#define GetUi32(p) ( \
+             ((const Byte *)(p))[0]        | \
+    ((UInt32)((const Byte *)(p))[1] <<  8) | \
+    ((UInt32)((const Byte *)(p))[2] << 16) | \
+    ((UInt32)((const Byte *)(p))[3] << 24))
+
+#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
+
+#define SetUi16(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
+    _ppp_[0] = (Byte)_vvv_; \
+    _ppp_[1] = (Byte)(_vvv_ >> 8); }
+
+#define SetUi32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
+    _ppp_[0] = (Byte)_vvv_; \
+    _ppp_[1] = (Byte)(_vvv_ >> 8); \
+    _ppp_[2] = (Byte)(_vvv_ >> 16); \
+    _ppp_[3] = (Byte)(_vvv_ >> 24); }
+
+#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \
+    SetUi32(_ppp2_    , (UInt32)_vvv2_); \
+    SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); }
+
+#endif
+
+#ifdef __has_builtin
+  #define MY__has_builtin(x) __has_builtin(x)
+#else
+  #define MY__has_builtin(x) 0
+#endif
+
+#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ (_MSC_VER >= 1300)
+
+/* Note: we use bswap instruction, that is unsupported in 386 cpu */
+
+#include <stdlib.h>
+
+#pragma intrinsic(_byteswap_ushort)
+#pragma intrinsic(_byteswap_ulong)
+#pragma intrinsic(_byteswap_uint64)
+
+/* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */
+#define GetBe32(p) _byteswap_ulong(*(const UInt32 *)(const Byte *)(p))
+#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const Byte *)(p))
+
+#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v)
+
+#elif defined(MY_CPU_LE_UNALIGN) && ( \
+       (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
+    || (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) )
+
+/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const Byte *)(p)) */
+#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const Byte *)(p))
+#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const Byte *)(p))
+
+#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v)
+
+#else
+
+#define GetBe32(p) ( \
+    ((UInt32)((const Byte *)(p))[0] << 24) | \
+    ((UInt32)((const Byte *)(p))[1] << 16) | \
+    ((UInt32)((const Byte *)(p))[2] <<  8) | \
+             ((const Byte *)(p))[3] )
+
+#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4))
+
+#define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
+    _ppp_[0] = (Byte)(_vvv_ >> 24); \
+    _ppp_[1] = (Byte)(_vvv_ >> 16); \
+    _ppp_[2] = (Byte)(_vvv_ >> 8); \
+    _ppp_[3] = (Byte)_vvv_; }
+
+#endif
+
+
+#ifndef GetBe16
+
+#define GetBe16(p) ( (UInt16) ( \
+    ((UInt16)((const Byte *)(p))[0] << 8) | \
+             ((const Byte *)(p))[1] ))
+
+#endif
+
+
+
+#ifdef MY_CPU_X86_OR_AMD64
+
+typedef struct
+{
+  UInt32 maxFunc;
+  UInt32 vendor[3];
+  UInt32 ver;
+  UInt32 b;
+  UInt32 c;
+  UInt32 d;
+} Cx86cpuid;
+
+enum
+{
+  CPU_FIRM_INTEL,
+  CPU_FIRM_AMD,
+  CPU_FIRM_VIA
+};
+
+void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d);
+
+BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p);
+int x86cpuid_GetFirm(const Cx86cpuid *p);
+
+#define x86cpuid_GetFamily(ver) (((ver >> 16) & 0xFF0) | ((ver >> 8) & 0xF))
+#define x86cpuid_GetModel(ver)  (((ver >> 12) &  0xF0) | ((ver >> 4) & 0xF))
+#define x86cpuid_GetStepping(ver) (ver & 0xF)
+
+BoolInt CPU_Is_InOrder();
+BoolInt CPU_Is_Aes_Supported();
+BoolInt CPU_IsSupported_PageGB();
+
+#endif
+
+EXTERN_C_END
+
+#endif
diff --git a/libraries/lzma/C/Delta.c b/libraries/lzma/C/Delta.c
new file mode 100644
index 000000000..e3edd21ed
--- /dev/null
+++ b/libraries/lzma/C/Delta.c
@@ -0,0 +1,64 @@
+/* Delta.c -- Delta converter
+2009-05-26 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "Delta.h"
+
+void Delta_Init(Byte *state)
+{
+  unsigned i;
+  for (i = 0; i < DELTA_STATE_SIZE; i++)
+    state[i] = 0;
+}
+
+static void MyMemCpy(Byte *dest, const Byte *src, unsigned size)
+{
+  unsigned i;
+  for (i = 0; i < size; i++)
+    dest[i] = src[i];
+}
+
+void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size)
+{
+  Byte buf[DELTA_STATE_SIZE];
+  unsigned j = 0;
+  MyMemCpy(buf, state, delta);
+  {
+    SizeT i;
+    for (i = 0; i < size;)
+    {
+      for (j = 0; j < delta && i < size; i++, j++)
+      {
+        Byte b = data[i];
+        data[i] = (Byte)(b - buf[j]);
+        buf[j] = b;
+      }
+    }
+  }
+  if (j == delta)
+    j = 0;
+  MyMemCpy(state, buf + j, delta - j);
+  MyMemCpy(state + delta - j, buf, j);
+}
+
+void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size)
+{
+  Byte buf[DELTA_STATE_SIZE];
+  unsigned j = 0;
+  MyMemCpy(buf, state, delta);
+  {
+    SizeT i;
+    for (i = 0; i < size;)
+    {
+      for (j = 0; j < delta && i < size; i++, j++)
+      {
+        buf[j] = data[i] = (Byte)(buf[j] + data[i]);
+      }
+    }
+  }
+  if (j == delta)
+    j = 0;
+  MyMemCpy(state, buf + j, delta - j);
+  MyMemCpy(state + delta - j, buf, j);
+}
diff --git a/libraries/lzma/C/Delta.h b/libraries/lzma/C/Delta.h
new file mode 100644
index 000000000..2fa54ad67
--- /dev/null
+++ b/libraries/lzma/C/Delta.h
@@ -0,0 +1,19 @@
+/* Delta.h -- Delta converter
+2013-01-18 : Igor Pavlov : Public domain */
+
+#ifndef __DELTA_H
+#define __DELTA_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+#define DELTA_STATE_SIZE 256
+
+void Delta_Init(Byte *state);
+void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size);
+void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size);
+
+EXTERN_C_END
+
+#endif
diff --git a/libraries/lzma/C/LzFind.c b/libraries/lzma/C/LzFind.c
new file mode 100644
index 000000000..df55e86c1
--- /dev/null
+++ b/libraries/lzma/C/LzFind.c
@@ -0,0 +1,1127 @@
+/* LzFind.c -- Match finder for LZ algorithms
+2018-07-08 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include <string.h>
+
+#include "LzFind.h"
+#include "LzHash.h"
+
+#define kEmptyHashValue 0
+#define kMaxValForNormalize ((UInt32)0xFFFFFFFF)
+#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */
+#define kNormalizeMask (~(UInt32)(kNormalizeStepMin - 1))
+#define kMaxHistorySize ((UInt32)7 << 29)
+
+#define kStartMaxLen 3
+
+static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc)
+{
+  if (!p->directInput)
+  {
+    ISzAlloc_Free(alloc, p->bufferBase);
+    p->bufferBase = NULL;
+  }
+}
+
+/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */
+
+static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAllocPtr alloc)
+{
+  UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv;
+  if (p->directInput)
+  {
+    p->blockSize = blockSize;
+    return 1;
+  }
+  if (!p->bufferBase || p->blockSize != blockSize)
+  {
+    LzInWindow_Free(p, alloc);
+    p->blockSize = blockSize;
+    p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, (size_t)blockSize);
+  }
+  return (p->bufferBase != NULL);
+}
+
+Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
+
+UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; }
+
+void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue)
+{
+  p->posLimit -= subValue;
+  p->pos -= subValue;
+  p->streamPos -= subValue;
+}
+
+static void MatchFinder_ReadBlock(CMatchFinder *p)
+{
+  if (p->streamEndWasReached || p->result != SZ_OK)
+    return;
+
+  /* We use (p->streamPos - p->pos) value. (p->streamPos < p->pos) is allowed. */
+
+  if (p->directInput)
+  {
+    UInt32 curSize = 0xFFFFFFFF - (p->streamPos - p->pos);
+    if (curSize > p->directInputRem)
+      curSize = (UInt32)p->directInputRem;
+    p->directInputRem -= curSize;
+    p->streamPos += curSize;
+    if (p->directInputRem == 0)
+      p->streamEndWasReached = 1;
+    return;
+  }
+  
+  for (;;)
+  {
+    Byte *dest = p->buffer + (p->streamPos - p->pos);
+    size_t size = (p->bufferBase + p->blockSize - dest);
+    if (size == 0)
+      return;
+
+    p->result = ISeqInStream_Read(p->stream, dest, &size);
+    if (p->result != SZ_OK)
+      return;
+    if (size == 0)
+    {
+      p->streamEndWasReached = 1;
+      return;
+    }
+    p->streamPos += (UInt32)size;
+    if (p->streamPos - p->pos > p->keepSizeAfter)
+      return;
+  }
+}
+
+void MatchFinder_MoveBlock(CMatchFinder *p)
+{
+  memmove(p->bufferBase,
+      p->buffer - p->keepSizeBefore,
+      (size_t)(p->streamPos - p->pos) + p->keepSizeBefore);
+  p->buffer = p->bufferBase + p->keepSizeBefore;
+}
+
+int MatchFinder_NeedMove(CMatchFinder *p)
+{
+  if (p->directInput)
+    return 0;
+  /* if (p->streamEndWasReached) return 0; */
+  return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
+}
+
+void MatchFinder_ReadIfRequired(CMatchFinder *p)
+{
+  if (p->streamEndWasReached)
+    return;
+  if (p->keepSizeAfter >= p->streamPos - p->pos)
+    MatchFinder_ReadBlock(p);
+}
+
+static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p)
+{
+  if (MatchFinder_NeedMove(p))
+    MatchFinder_MoveBlock(p);
+  MatchFinder_ReadBlock(p);
+}
+
+static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
+{
+  p->cutValue = 32;
+  p->btMode = 1;
+  p->numHashBytes = 4;
+  p->bigHash = 0;
+}
+
+#define kCrcPoly 0xEDB88320
+
+void MatchFinder_Construct(CMatchFinder *p)
+{
+  unsigned i;
+  p->bufferBase = NULL;
+  p->directInput = 0;
+  p->hash = NULL;
+  p->expectedDataSize = (UInt64)(Int64)-1;
+  MatchFinder_SetDefaultSettings(p);
+
+  for (i = 0; i < 256; i++)
+  {
+    UInt32 r = (UInt32)i;
+    unsigned j;
+    for (j = 0; j < 8; j++)
+      r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1)));
+    p->crc[i] = r;
+  }
+}
+
+static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc)
+{
+  ISzAlloc_Free(alloc, p->hash);
+  p->hash = NULL;
+}
+
+void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc)
+{
+  MatchFinder_FreeThisClassMemory(p, alloc);
+  LzInWindow_Free(p, alloc);
+}
+
+static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc)
+{
+  size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
+  if (sizeInBytes / sizeof(CLzRef) != num)
+    return NULL;
+  return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes);
+}
+
+int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
+    UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
+    ISzAllocPtr alloc)
+{
+  UInt32 sizeReserv;
+  
+  if (historySize > kMaxHistorySize)
+  {
+    MatchFinder_Free(p, alloc);
+    return 0;
+  }
+  
+  sizeReserv = historySize >> 1;
+       if (historySize >= ((UInt32)3 << 30)) sizeReserv = historySize >> 3;
+  else if (historySize >= ((UInt32)2 << 30)) sizeReserv = historySize >> 2;
+  
+  sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19);
+
+  p->keepSizeBefore = historySize + keepAddBufferBefore + 1;
+  p->keepSizeAfter = matchMaxLen + keepAddBufferAfter;
+  
+  /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */
+  
+  if (LzInWindow_Create(p, sizeReserv, alloc))
+  {
+    UInt32 newCyclicBufferSize = historySize + 1;
+    UInt32 hs;
+    p->matchMaxLen = matchMaxLen;
+    {
+      p->fixedHashSize = 0;
+      if (p->numHashBytes == 2)
+        hs = (1 << 16) - 1;
+      else
+      {
+        hs = historySize;
+        if (hs > p->expectedDataSize)
+          hs = (UInt32)p->expectedDataSize;
+        if (hs != 0)
+          hs--;
+        hs |= (hs >> 1);
+        hs |= (hs >> 2);
+        hs |= (hs >> 4);
+        hs |= (hs >> 8);
+        hs >>= 1;
+        hs |= 0xFFFF; /* don't change it! It's required for Deflate */
+        if (hs > (1 << 24))
+        {
+          if (p->numHashBytes == 3)
+            hs = (1 << 24) - 1;
+          else
+            hs >>= 1;
+          /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
+        }
+      }
+      p->hashMask = hs;
+      hs++;
+      if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size;
+      if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size;
+      if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size;
+      hs += p->fixedHashSize;
+    }
+
+    {
+      size_t newSize;
+      size_t numSons;
+      p->historySize = historySize;
+      p->hashSizeSum = hs;
+      p->cyclicBufferSize = newCyclicBufferSize;
+      
+      numSons = newCyclicBufferSize;
+      if (p->btMode)
+        numSons <<= 1;
+      newSize = hs + numSons;
+
+      if (p->hash && p->numRefs == newSize)
+        return 1;
+      
+      MatchFinder_FreeThisClassMemory(p, alloc);
+      p->numRefs = newSize;
+      p->hash = AllocRefs(newSize, alloc);
+      
+      if (p->hash)
+      {
+        p->son = p->hash + p->hashSizeSum;
+        return 1;
+      }
+    }
+  }
+
+  MatchFinder_Free(p, alloc);
+  return 0;
+}
+
+static void MatchFinder_SetLimits(CMatchFinder *p)
+{
+  UInt32 limit = kMaxValForNormalize - p->pos;
+  UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos;
+  
+  if (limit2 < limit)
+    limit = limit2;
+  limit2 = p->streamPos - p->pos;
+  
+  if (limit2 <= p->keepSizeAfter)
+  {
+    if (limit2 > 0)
+      limit2 = 1;
+  }
+  else
+    limit2 -= p->keepSizeAfter;
+  
+  if (limit2 < limit)
+    limit = limit2;
+  
+  {
+    UInt32 lenLimit = p->streamPos - p->pos;
+    if (lenLimit > p->matchMaxLen)
+      lenLimit = p->matchMaxLen;
+    p->lenLimit = lenLimit;
+  }
+  p->posLimit = p->pos + limit;
+}
+
+
+void MatchFinder_Init_LowHash(CMatchFinder *p)
+{
+  size_t i;
+  CLzRef *items = p->hash;
+  size_t numItems = p->fixedHashSize;
+  for (i = 0; i < numItems; i++)
+    items[i] = kEmptyHashValue;
+}
+
+
+void MatchFinder_Init_HighHash(CMatchFinder *p)
+{
+  size_t i;
+  CLzRef *items = p->hash + p->fixedHashSize;
+  size_t numItems = (size_t)p->hashMask + 1;
+  for (i = 0; i < numItems; i++)
+    items[i] = kEmptyHashValue;
+}
+
+
+void MatchFinder_Init_3(CMatchFinder *p, int readData)
+{
+  p->cyclicBufferPos = 0;
+  p->buffer = p->bufferBase;
+  p->pos =
+  p->streamPos = p->cyclicBufferSize;
+  p->result = SZ_OK;
+  p->streamEndWasReached = 0;
+  
+  if (readData)
+    MatchFinder_ReadBlock(p);
+  
+  MatchFinder_SetLimits(p);
+}
+
+
+void MatchFinder_Init(CMatchFinder *p)
+{
+  MatchFinder_Init_HighHash(p);
+  MatchFinder_Init_LowHash(p);
+  MatchFinder_Init_3(p, True);
+}
+
+  
+static UInt32 MatchFinder_GetSubValue(CMatchFinder *p)
+{
+  return (p->pos - p->historySize - 1) & kNormalizeMask;
+}
+
+void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
+{
+  size_t i;
+  for (i = 0; i < numItems; i++)
+  {
+    UInt32 value = items[i];
+    if (value <= subValue)
+      value = kEmptyHashValue;
+    else
+      value -= subValue;
+    items[i] = value;
+  }
+}
+
+static void MatchFinder_Normalize(CMatchFinder *p)
+{
+  UInt32 subValue = MatchFinder_GetSubValue(p);
+  MatchFinder_Normalize3(subValue, p->hash, p->numRefs);
+  MatchFinder_ReduceOffsets(p, subValue);
+}
+
+
+MY_NO_INLINE
+static void MatchFinder_CheckLimits(CMatchFinder *p)
+{
+  if (p->pos == kMaxValForNormalize)
+    MatchFinder_Normalize(p);
+  if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos)
+    MatchFinder_CheckAndMoveAndRead(p);
+  if (p->cyclicBufferPos == p->cyclicBufferSize)
+    p->cyclicBufferPos = 0;
+  MatchFinder_SetLimits(p);
+}
+
+
+/*
+  (lenLimit > maxLen)
+*/
+MY_FORCE_INLINE
+static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
+    UInt32 *distances, unsigned maxLen)
+{
+  /*
+  son[_cyclicBufferPos] = curMatch;
+  for (;;)
+  {
+    UInt32 delta = pos - curMatch;
+    if (cutValue-- == 0 || delta >= _cyclicBufferSize)
+      return distances;
+    {
+      const Byte *pb = cur - delta;
+      curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
+      if (pb[maxLen] == cur[maxLen] && *pb == *cur)
+      {
+        UInt32 len = 0;
+        while (++len != lenLimit)
+          if (pb[len] != cur[len])
+            break;
+        if (maxLen < len)
+        {
+          maxLen = len;
+          *distances++ = len;
+          *distances++ = delta - 1;
+          if (len == lenLimit)
+            return distances;
+        }
+      }
+    }
+  }
+  */
+
+  const Byte *lim = cur + lenLimit;
+  son[_cyclicBufferPos] = curMatch;
+  do
+  {
+    UInt32 delta = pos - curMatch;
+    if (delta >= _cyclicBufferSize)
+      break;
+    {
+      ptrdiff_t diff;
+      curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
+      diff = (ptrdiff_t)0 - delta;
+      if (cur[maxLen] == cur[maxLen + diff])
+      {
+        const Byte *c = cur;
+        while (*c == c[diff])
+        {
+          if (++c == lim)
+          {
+            distances[0] = (UInt32)(lim - cur);
+            distances[1] = delta - 1;
+            return distances + 2;
+          }
+        }
+        {
+          unsigned len = (unsigned)(c - cur);
+          if (maxLen < len)
+          {
+            maxLen = len;
+            distances[0] = (UInt32)len;
+            distances[1] = delta - 1;
+            distances += 2;
+          }
+        }
+      }
+    }
+  }
+  while (--cutValue);
+  
+  return distances;
+}
+
+
+MY_FORCE_INLINE
+UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
+    UInt32 *distances, UInt32 maxLen)
+{
+  CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
+  CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+  unsigned len0 = 0, len1 = 0;
+  for (;;)
+  {
+    UInt32 delta = pos - curMatch;
+    if (cutValue-- == 0 || delta >= _cyclicBufferSize)
+    {
+      *ptr0 = *ptr1 = kEmptyHashValue;
+      return distances;
+    }
+    {
+      CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
+      const Byte *pb = cur - delta;
+      unsigned len = (len0 < len1 ? len0 : len1);
+      UInt32 pair0 = pair[0];
+      if (pb[len] == cur[len])
+      {
+        if (++len != lenLimit && pb[len] == cur[len])
+          while (++len != lenLimit)
+            if (pb[len] != cur[len])
+              break;
+        if (maxLen < len)
+        {
+          maxLen = (UInt32)len;
+          *distances++ = (UInt32)len;
+          *distances++ = delta - 1;
+          if (len == lenLimit)
+          {
+            *ptr1 = pair0;
+            *ptr0 = pair[1];
+            return distances;
+          }
+        }
+      }
+      if (pb[len] < cur[len])
+      {
+        *ptr1 = curMatch;
+        ptr1 = pair + 1;
+        curMatch = *ptr1;
+        len1 = len;
+      }
+      else
+      {
+        *ptr0 = curMatch;
+        ptr0 = pair;
+        curMatch = *ptr0;
+        len0 = len;
+      }
+    }
+  }
+}
+
+static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
+{
+  CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
+  CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+  unsigned len0 = 0, len1 = 0;
+  for (;;)
+  {
+    UInt32 delta = pos - curMatch;
+    if (cutValue-- == 0 || delta >= _cyclicBufferSize)
+    {
+      *ptr0 = *ptr1 = kEmptyHashValue;
+      return;
+    }
+    {
+      CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
+      const Byte *pb = cur - delta;
+      unsigned len = (len0 < len1 ? len0 : len1);
+      if (pb[len] == cur[len])
+      {
+        while (++len != lenLimit)
+          if (pb[len] != cur[len])
+            break;
+        {
+          if (len == lenLimit)
+          {
+            *ptr1 = pair[0];
+            *ptr0 = pair[1];
+            return;
+          }
+        }
+      }
+      if (pb[len] < cur[len])
+      {
+        *ptr1 = curMatch;
+        ptr1 = pair + 1;
+        curMatch = *ptr1;
+        len1 = len;
+      }
+      else
+      {
+        *ptr0 = curMatch;
+        ptr0 = pair;
+        curMatch = *ptr0;
+        len0 = len;
+      }
+    }
+  }
+}
+
+#define MOVE_POS \
+  ++p->cyclicBufferPos; \
+  p->buffer++; \
+  if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p);
+
+#define MOVE_POS_RET MOVE_POS return (UInt32)offset;
+
+static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; }
+
+#define GET_MATCHES_HEADER2(minLen, ret_op) \
+  unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \
+  lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
+  cur = p->buffer;
+
+#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0)
+#define SKIP_HEADER(minLen)        GET_MATCHES_HEADER2(minLen, continue)
+
+#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
+
+#define GET_MATCHES_FOOTER(offset, maxLen) \
+  offset = (unsigned)(GetMatchesSpec1((UInt32)lenLimit, curMatch, MF_PARAMS(p), \
+  distances + offset, (UInt32)maxLen) - distances); MOVE_POS_RET;
+
+#define SKIP_FOOTER \
+  SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;
+
+#define UPDATE_maxLen { \
+    ptrdiff_t diff = (ptrdiff_t)0 - d2; \
+    const Byte *c = cur + maxLen; \
+    const Byte *lim = cur + lenLimit; \
+    for (; c != lim; c++) if (*(c + diff) != *c) break; \
+    maxLen = (unsigned)(c - cur); }
+
+static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  unsigned offset;
+  GET_MATCHES_HEADER(2)
+  HASH2_CALC;
+  curMatch = p->hash[hv];
+  p->hash[hv] = p->pos;
+  offset = 0;
+  GET_MATCHES_FOOTER(offset, 1)
+}
+
+UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  unsigned offset;
+  GET_MATCHES_HEADER(3)
+  HASH_ZIP_CALC;
+  curMatch = p->hash[hv];
+  p->hash[hv] = p->pos;
+  offset = 0;
+  GET_MATCHES_FOOTER(offset, 2)
+}
+
+static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  UInt32 h2, d2, pos;
+  unsigned maxLen, offset;
+  UInt32 *hash;
+  GET_MATCHES_HEADER(3)
+
+  HASH3_CALC;
+
+  hash = p->hash;
+  pos = p->pos;
+
+  d2 = pos - hash[h2];
+
+  curMatch = (hash + kFix3HashSize)[hv];
+  
+  hash[h2] = pos;
+  (hash + kFix3HashSize)[hv] = pos;
+
+  maxLen = 2;
+  offset = 0;
+
+  if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+  {
+    UPDATE_maxLen
+    distances[0] = (UInt32)maxLen;
+    distances[1] = d2 - 1;
+    offset = 2;
+    if (maxLen == lenLimit)
+    {
+      SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p));
+      MOVE_POS_RET;
+    }
+  }
+  
+  GET_MATCHES_FOOTER(offset, maxLen)
+}
+
+static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  UInt32 h2, h3, d2, d3, pos;
+  unsigned maxLen, offset;
+  UInt32 *hash;
+  GET_MATCHES_HEADER(4)
+
+  HASH4_CALC;
+
+  hash = p->hash;
+  pos = p->pos;
+
+  d2 = pos - hash                  [h2];
+  d3 = pos - (hash + kFix3HashSize)[h3];
+
+  curMatch = (hash + kFix4HashSize)[hv];
+
+  hash                  [h2] = pos;
+  (hash + kFix3HashSize)[h3] = pos;
+  (hash + kFix4HashSize)[hv] = pos;
+
+  maxLen = 0;
+  offset = 0;
+  
+  if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+  {
+    maxLen = 2;
+    distances[0] = 2;
+    distances[1] = d2 - 1;
+    offset = 2;
+  }
+  
+  if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+  {
+    maxLen = 3;
+    distances[(size_t)offset + 1] = d3 - 1;
+    offset += 2;
+    d2 = d3;
+  }
+  
+  if (offset != 0)
+  {
+    UPDATE_maxLen
+    distances[(size_t)offset - 2] = (UInt32)maxLen;
+    if (maxLen == lenLimit)
+    {
+      SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p));
+      MOVE_POS_RET;
+    }
+  }
+  
+  if (maxLen < 3)
+    maxLen = 3;
+  
+  GET_MATCHES_FOOTER(offset, maxLen)
+}
+
+/*
+static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos;
+  UInt32 *hash;
+  GET_MATCHES_HEADER(5)
+
+  HASH5_CALC;
+
+  hash = p->hash;
+  pos = p->pos;
+
+  d2 = pos - hash                  [h2];
+  d3 = pos - (hash + kFix3HashSize)[h3];
+  d4 = pos - (hash + kFix4HashSize)[h4];
+
+  curMatch = (hash + kFix5HashSize)[hv];
+
+  hash                  [h2] = pos;
+  (hash + kFix3HashSize)[h3] = pos;
+  (hash + kFix4HashSize)[h4] = pos;
+  (hash + kFix5HashSize)[hv] = pos;
+
+  maxLen = 0;
+  offset = 0;
+
+  if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+  {
+    distances[0] = maxLen = 2;
+    distances[1] = d2 - 1;
+    offset = 2;
+    if (*(cur - d2 + 2) == cur[2])
+      distances[0] = maxLen = 3;
+    else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+    {
+      distances[2] = maxLen = 3;
+      distances[3] = d3 - 1;
+      offset = 4;
+      d2 = d3;
+    }
+  }
+  else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+  {
+    distances[0] = maxLen = 3;
+    distances[1] = d3 - 1;
+    offset = 2;
+    d2 = d3;
+  }
+  
+  if (d2 != d4 && d4 < p->cyclicBufferSize
+      && *(cur - d4) == *cur
+      && *(cur - d4 + 3) == *(cur + 3))
+  {
+    maxLen = 4;
+    distances[(size_t)offset + 1] = d4 - 1;
+    offset += 2;
+    d2 = d4;
+  }
+  
+  if (offset != 0)
+  {
+    UPDATE_maxLen
+    distances[(size_t)offset - 2] = maxLen;
+    if (maxLen == lenLimit)
+    {
+      SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
+      MOVE_POS_RET;
+    }
+  }
+
+  if (maxLen < 4)
+    maxLen = 4;
+  
+  GET_MATCHES_FOOTER(offset, maxLen)
+}
+*/
+
+static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  UInt32 h2, h3, d2, d3, pos;
+  unsigned maxLen, offset;
+  UInt32 *hash;
+  GET_MATCHES_HEADER(4)
+
+  HASH4_CALC;
+
+  hash = p->hash;
+  pos = p->pos;
+  
+  d2 = pos - hash                  [h2];
+  d3 = pos - (hash + kFix3HashSize)[h3];
+  curMatch = (hash + kFix4HashSize)[hv];
+
+  hash                  [h2] = pos;
+  (hash + kFix3HashSize)[h3] = pos;
+  (hash + kFix4HashSize)[hv] = pos;
+
+  maxLen = 0;
+  offset = 0;
+
+  if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+  {
+    maxLen = 2;
+    distances[0] = 2;
+    distances[1] = d2 - 1;
+    offset = 2;
+  }
+  
+  if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+  {
+    maxLen = 3;
+    distances[(size_t)offset + 1] = d3 - 1;
+    offset += 2;
+    d2 = d3;
+  }
+  
+  if (offset != 0)
+  {
+    UPDATE_maxLen
+    distances[(size_t)offset - 2] = (UInt32)maxLen;
+    if (maxLen == lenLimit)
+    {
+      p->son[p->cyclicBufferPos] = curMatch;
+      MOVE_POS_RET;
+    }
+  }
+  
+  if (maxLen < 3)
+    maxLen = 3;
+
+  offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
+      distances + offset, maxLen) - (distances));
+  MOVE_POS_RET
+}
+
+/*
+static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos
+  UInt32 *hash;
+  GET_MATCHES_HEADER(5)
+
+  HASH5_CALC;
+
+  hash = p->hash;
+  pos = p->pos;
+  
+  d2 = pos - hash                  [h2];
+  d3 = pos - (hash + kFix3HashSize)[h3];
+  d4 = pos - (hash + kFix4HashSize)[h4];
+
+  curMatch = (hash + kFix5HashSize)[hv];
+
+  hash                  [h2] = pos;
+  (hash + kFix3HashSize)[h3] = pos;
+  (hash + kFix4HashSize)[h4] = pos;
+  (hash + kFix5HashSize)[hv] = pos;
+
+  maxLen = 0;
+  offset = 0;
+
+  if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+  {
+    distances[0] = maxLen = 2;
+    distances[1] = d2 - 1;
+    offset = 2;
+    if (*(cur - d2 + 2) == cur[2])
+      distances[0] = maxLen = 3;
+    else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+    {
+      distances[2] = maxLen = 3;
+      distances[3] = d3 - 1;
+      offset = 4;
+      d2 = d3;
+    }
+  }
+  else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+  {
+    distances[0] = maxLen = 3;
+    distances[1] = d3 - 1;
+    offset = 2;
+    d2 = d3;
+  }
+  
+  if (d2 != d4 && d4 < p->cyclicBufferSize
+      && *(cur - d4) == *cur
+      && *(cur - d4 + 3) == *(cur + 3))
+  {
+    maxLen = 4;
+    distances[(size_t)offset + 1] = d4 - 1;
+    offset += 2;
+    d2 = d4;
+  }
+  
+  if (offset != 0)
+  {
+    UPDATE_maxLen
+    distances[(size_t)offset - 2] = maxLen;
+    if (maxLen == lenLimit)
+    {
+      p->son[p->cyclicBufferPos] = curMatch;
+      MOVE_POS_RET;
+    }
+  }
+  
+  if (maxLen < 4)
+    maxLen = 4;
+
+  offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
+      distances + offset, maxLen) - (distances));
+  MOVE_POS_RET
+}
+*/
+
+UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  unsigned offset;
+  GET_MATCHES_HEADER(3)
+  HASH_ZIP_CALC;
+  curMatch = p->hash[hv];
+  p->hash[hv] = p->pos;
+  offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
+      distances, 2) - (distances));
+  MOVE_POS_RET
+}
+
+static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    SKIP_HEADER(2)
+    HASH2_CALC;
+    curMatch = p->hash[hv];
+    p->hash[hv] = p->pos;
+    SKIP_FOOTER
+  }
+  while (--num != 0);
+}
+
+void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    SKIP_HEADER(3)
+    HASH_ZIP_CALC;
+    curMatch = p->hash[hv];
+    p->hash[hv] = p->pos;
+    SKIP_FOOTER
+  }
+  while (--num != 0);
+}
+
+static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    UInt32 h2;
+    UInt32 *hash;
+    SKIP_HEADER(3)
+    HASH3_CALC;
+    hash = p->hash;
+    curMatch = (hash + kFix3HashSize)[hv];
+    hash[h2] =
+    (hash + kFix3HashSize)[hv] = p->pos;
+    SKIP_FOOTER
+  }
+  while (--num != 0);
+}
+
+static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    UInt32 h2, h3;
+    UInt32 *hash;
+    SKIP_HEADER(4)
+    HASH4_CALC;
+    hash = p->hash;
+    curMatch = (hash + kFix4HashSize)[hv];
+    hash                  [h2] =
+    (hash + kFix3HashSize)[h3] =
+    (hash + kFix4HashSize)[hv] = p->pos;
+    SKIP_FOOTER
+  }
+  while (--num != 0);
+}
+
+/*
+static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    UInt32 h2, h3, h4;
+    UInt32 *hash;
+    SKIP_HEADER(5)
+    HASH5_CALC;
+    hash = p->hash;
+    curMatch = (hash + kFix5HashSize)[hv];
+    hash                  [h2] =
+    (hash + kFix3HashSize)[h3] =
+    (hash + kFix4HashSize)[h4] =
+    (hash + kFix5HashSize)[hv] = p->pos;
+    SKIP_FOOTER
+  }
+  while (--num != 0);
+}
+*/
+
+static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    UInt32 h2, h3;
+    UInt32 *hash;
+    SKIP_HEADER(4)
+    HASH4_CALC;
+    hash = p->hash;
+    curMatch = (hash + kFix4HashSize)[hv];
+    hash                  [h2] =
+    (hash + kFix3HashSize)[h3] =
+    (hash + kFix4HashSize)[hv] = p->pos;
+    p->son[p->cyclicBufferPos] = curMatch;
+    MOVE_POS
+  }
+  while (--num != 0);
+}
+
+/*
+static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    UInt32 h2, h3, h4;
+    UInt32 *hash;
+    SKIP_HEADER(5)
+    HASH5_CALC;
+    hash = p->hash;
+    curMatch = hash + kFix5HashSize)[hv];
+    hash                  [h2] =
+    (hash + kFix3HashSize)[h3] =
+    (hash + kFix4HashSize)[h4] =
+    (hash + kFix5HashSize)[hv] = p->pos;
+    p->son[p->cyclicBufferPos] = curMatch;
+    MOVE_POS
+  }
+  while (--num != 0);
+}
+*/
+
+void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    SKIP_HEADER(3)
+    HASH_ZIP_CALC;
+    curMatch = p->hash[hv];
+    p->hash[hv] = p->pos;
+    p->son[p->cyclicBufferPos] = curMatch;
+    MOVE_POS
+  }
+  while (--num != 0);
+}
+
+void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable)
+{
+  vTable->Init = (Mf_Init_Func)MatchFinder_Init;
+  vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes;
+  vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos;
+  if (!p->btMode)
+  {
+    /* if (p->numHashBytes <= 4) */
+    {
+      vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches;
+      vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip;
+    }
+    /*
+    else
+    {
+      vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches;
+      vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip;
+    }
+    */
+  }
+  else if (p->numHashBytes == 2)
+  {
+    vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches;
+    vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip;
+  }
+  else if (p->numHashBytes == 3)
+  {
+    vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches;
+    vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip;
+  }
+  else /* if (p->numHashBytes == 4) */
+  {
+    vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches;
+    vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip;
+  }
+  /*
+  else
+  {
+    vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches;
+    vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip;
+  }
+  */
+}
diff --git a/libraries/lzma/C/LzFind.h b/libraries/lzma/C/LzFind.h
new file mode 100644
index 000000000..42c13be15
--- /dev/null
+++ b/libraries/lzma/C/LzFind.h
@@ -0,0 +1,121 @@
+/* LzFind.h -- Match finder for LZ algorithms
+2017-06-10 : Igor Pavlov : Public domain */
+
+#ifndef __LZ_FIND_H
+#define __LZ_FIND_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+typedef UInt32 CLzRef;
+
+typedef struct _CMatchFinder
+{
+  Byte *buffer;
+  UInt32 pos;
+  UInt32 posLimit;
+  UInt32 streamPos;
+  UInt32 lenLimit;
+
+  UInt32 cyclicBufferPos;
+  UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */
+
+  Byte streamEndWasReached;
+  Byte btMode;
+  Byte bigHash;
+  Byte directInput;
+
+  UInt32 matchMaxLen;
+  CLzRef *hash;
+  CLzRef *son;
+  UInt32 hashMask;
+  UInt32 cutValue;
+
+  Byte *bufferBase;
+  ISeqInStream *stream;
+  
+  UInt32 blockSize;
+  UInt32 keepSizeBefore;
+  UInt32 keepSizeAfter;
+
+  UInt32 numHashBytes;
+  size_t directInputRem;
+  UInt32 historySize;
+  UInt32 fixedHashSize;
+  UInt32 hashSizeSum;
+  SRes result;
+  UInt32 crc[256];
+  size_t numRefs;
+
+  UInt64 expectedDataSize;
+} CMatchFinder;
+
+#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer)
+
+#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos)
+
+#define Inline_MatchFinder_IsFinishedOK(p) \
+    ((p)->streamEndWasReached \
+        && (p)->streamPos == (p)->pos \
+        && (!(p)->directInput || (p)->directInputRem == 0))
+      
+int MatchFinder_NeedMove(CMatchFinder *p);
+Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p);
+void MatchFinder_MoveBlock(CMatchFinder *p);
+void MatchFinder_ReadIfRequired(CMatchFinder *p);
+
+void MatchFinder_Construct(CMatchFinder *p);
+
+/* Conditions:
+     historySize <= 3 GB
+     keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB
+*/
+int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
+    UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
+    ISzAllocPtr alloc);
+void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc);
+void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems);
+void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
+
+UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son,
+    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
+    UInt32 *distances, UInt32 maxLen);
+
+/*
+Conditions:
+  Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func.
+  Mf_GetPointerToCurrentPos_Func's result must be used only before any other function
+*/
+
+typedef void (*Mf_Init_Func)(void *object);
+typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object);
+typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
+typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
+typedef void (*Mf_Skip_Func)(void *object, UInt32);
+
+typedef struct _IMatchFinder
+{
+  Mf_Init_Func Init;
+  Mf_GetNumAvailableBytes_Func GetNumAvailableBytes;
+  Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos;
+  Mf_GetMatches_Func GetMatches;
+  Mf_Skip_Func Skip;
+} IMatchFinder;
+
+void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable);
+
+void MatchFinder_Init_LowHash(CMatchFinder *p);
+void MatchFinder_Init_HighHash(CMatchFinder *p);
+void MatchFinder_Init_3(CMatchFinder *p, int readData);
+void MatchFinder_Init(CMatchFinder *p);
+
+UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
+UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
+
+void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
+void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
+
+EXTERN_C_END
+
+#endif
diff --git a/libraries/lzma/C/LzFindMt.c b/libraries/lzma/C/LzFindMt.c
new file mode 100644
index 000000000..bb0f42c30
--- /dev/null
+++ b/libraries/lzma/C/LzFindMt.c
@@ -0,0 +1,853 @@
+/* LzFindMt.c -- multithreaded Match finder for LZ algorithms
+2018-12-29 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "LzHash.h"
+
+#include "LzFindMt.h"
+
+static void MtSync_Construct(CMtSync *p)
+{
+  p->wasCreated = False;
+  p->csWasInitialized = False;
+  p->csWasEntered = False;
+  Thread_Construct(&p->thread);
+  Event_Construct(&p->canStart);
+  Event_Construct(&p->wasStarted);
+  Event_Construct(&p->wasStopped);
+  Semaphore_Construct(&p->freeSemaphore);
+  Semaphore_Construct(&p->filledSemaphore);
+}
+
+static void MtSync_GetNextBlock(CMtSync *p)
+{
+  if (p->needStart)
+  {
+    p->numProcessedBlocks = 1;
+    p->needStart = False;
+    p->stopWriting = False;
+    p->exit = False;
+    Event_Reset(&p->wasStarted);
+    Event_Reset(&p->wasStopped);
+
+    Event_Set(&p->canStart);
+    Event_Wait(&p->wasStarted);
+
+    // if (mt) MatchFinder_Init_LowHash(mt->MatchFinder);
+  }
+  else
+  {
+    CriticalSection_Leave(&p->cs);
+    p->csWasEntered = False;
+    p->numProcessedBlocks++;
+    Semaphore_Release1(&p->freeSemaphore);
+  }
+  Semaphore_Wait(&p->filledSemaphore);
+  CriticalSection_Enter(&p->cs);
+  p->csWasEntered = True;
+}
+
+/* MtSync_StopWriting must be called if Writing was started */
+
+static void MtSync_StopWriting(CMtSync *p)
+{
+  UInt32 myNumBlocks = p->numProcessedBlocks;
+  if (!Thread_WasCreated(&p->thread) || p->needStart)
+    return;
+  p->stopWriting = True;
+  if (p->csWasEntered)
+  {
+    CriticalSection_Leave(&p->cs);
+    p->csWasEntered = False;
+  }
+  Semaphore_Release1(&p->freeSemaphore);
+ 
+  Event_Wait(&p->wasStopped);
+
+  while (myNumBlocks++ != p->numProcessedBlocks)
+  {
+    Semaphore_Wait(&p->filledSemaphore);
+    Semaphore_Release1(&p->freeSemaphore);
+  }
+  p->needStart = True;
+}
+
+static void MtSync_Destruct(CMtSync *p)
+{
+  if (Thread_WasCreated(&p->thread))
+  {
+    MtSync_StopWriting(p);
+    p->exit = True;
+    if (p->needStart)
+      Event_Set(&p->canStart);
+    Thread_Wait(&p->thread);
+    Thread_Close(&p->thread);
+  }
+  if (p->csWasInitialized)
+  {
+    CriticalSection_Delete(&p->cs);
+    p->csWasInitialized = False;
+  }
+
+  Event_Close(&p->canStart);
+  Event_Close(&p->wasStarted);
+  Event_Close(&p->wasStopped);
+  Semaphore_Close(&p->freeSemaphore);
+  Semaphore_Close(&p->filledSemaphore);
+
+  p->wasCreated = False;
+}
+
+#define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }
+
+static SRes MtSync_Create2(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj, UInt32 numBlocks)
+{
+  if (p->wasCreated)
+    return SZ_OK;
+
+  RINOK_THREAD(CriticalSection_Init(&p->cs));
+  p->csWasInitialized = True;
+
+  RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart));
+  RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStarted));
+  RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped));
+  
+  RINOK_THREAD(Semaphore_Create(&p->freeSemaphore, numBlocks, numBlocks));
+  RINOK_THREAD(Semaphore_Create(&p->filledSemaphore, 0, numBlocks));
+
+  p->needStart = True;
+  
+  RINOK_THREAD(Thread_Create(&p->thread, startAddress, obj));
+  p->wasCreated = True;
+  return SZ_OK;
+}
+
+static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj, UInt32 numBlocks)
+{
+  SRes res = MtSync_Create2(p, startAddress, obj, numBlocks);
+  if (res != SZ_OK)
+    MtSync_Destruct(p);
+  return res;
+}
+
+void MtSync_Init(CMtSync *p) { p->needStart = True; }
+
+#define kMtMaxValForNormalize 0xFFFFFFFF
+
+#define DEF_GetHeads2(name, v, action) \
+  static void GetHeads ## name(const Byte *p, UInt32 pos, \
+      UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc) \
+    { action; for (; numHeads != 0; numHeads--) { \
+      const UInt32 value = (v); p++; *heads++ = pos - hash[value]; hash[value] = pos++;  } }
+
+#define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;)
+
+DEF_GetHeads2(2,  (p[0] | ((UInt32)p[1] << 8)), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
+DEF_GetHeads(3,  (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8)) & hashMask)
+DEF_GetHeads(4,  (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5)) & hashMask)
+DEF_GetHeads(4b, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ ((UInt32)p[3] << 16)) & hashMask)
+/* DEF_GetHeads(5,  (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5) ^ (crc[p[4]] << 3)) & hashMask) */
+
+static void HashThreadFunc(CMatchFinderMt *mt)
+{
+  CMtSync *p = &mt->hashSync;
+  for (;;)
+  {
+    UInt32 numProcessedBlocks = 0;
+    Event_Wait(&p->canStart);
+    Event_Set(&p->wasStarted);
+
+    MatchFinder_Init_HighHash(mt->MatchFinder);
+
+    for (;;)
+    {
+      if (p->exit)
+        return;
+      if (p->stopWriting)
+      {
+        p->numProcessedBlocks = numProcessedBlocks;
+        Event_Set(&p->wasStopped);
+        break;
+      }
+
+      {
+        CMatchFinder *mf = mt->MatchFinder;
+        if (MatchFinder_NeedMove(mf))
+        {
+          CriticalSection_Enter(&mt->btSync.cs);
+          CriticalSection_Enter(&mt->hashSync.cs);
+          {
+            const Byte *beforePtr = Inline_MatchFinder_GetPointerToCurrentPos(mf);
+            ptrdiff_t offset;
+            MatchFinder_MoveBlock(mf);
+            offset = beforePtr - Inline_MatchFinder_GetPointerToCurrentPos(mf);
+            mt->pointerToCurPos -= offset;
+            mt->buffer -= offset;
+          }
+          CriticalSection_Leave(&mt->btSync.cs);
+          CriticalSection_Leave(&mt->hashSync.cs);
+          continue;
+        }
+
+        Semaphore_Wait(&p->freeSemaphore);
+
+        MatchFinder_ReadIfRequired(mf);
+        if (mf->pos > (kMtMaxValForNormalize - kMtHashBlockSize))
+        {
+          UInt32 subValue = (mf->pos - mf->historySize - 1);
+          MatchFinder_ReduceOffsets(mf, subValue);
+          MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1);
+        }
+        {
+          UInt32 *heads = mt->hashBuf + ((numProcessedBlocks++) & kMtHashNumBlocksMask) * kMtHashBlockSize;
+          UInt32 num = mf->streamPos - mf->pos;
+          heads[0] = 2;
+          heads[1] = num;
+          if (num >= mf->numHashBytes)
+          {
+            num = num - mf->numHashBytes + 1;
+            if (num > kMtHashBlockSize - 2)
+              num = kMtHashBlockSize - 2;
+            mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc);
+            heads[0] = 2 + num;
+          }
+          mf->pos += num;
+          mf->buffer += num;
+        }
+      }
+
+      Semaphore_Release1(&p->filledSemaphore);
+    }
+  }
+}
+
+static void MatchFinderMt_GetNextBlock_Hash(CMatchFinderMt *p)
+{
+  MtSync_GetNextBlock(&p->hashSync);
+  p->hashBufPosLimit = p->hashBufPos = ((p->hashSync.numProcessedBlocks - 1) & kMtHashNumBlocksMask) * kMtHashBlockSize;
+  p->hashBufPosLimit += p->hashBuf[p->hashBufPos++];
+  p->hashNumAvail = p->hashBuf[p->hashBufPos++];
+}
+
+#define kEmptyHashValue 0
+
+#define MFMT_GM_INLINE
+
+#ifdef MFMT_GM_INLINE
+
+/*
+  we use size_t for _cyclicBufferPos instead of UInt32
+  to eliminate "movsx" BUG in old MSVC x64 compiler.
+*/
+
+MY_NO_INLINE
+static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLzRef *son,
+    size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
+    UInt32 *distances, UInt32 _maxLen, const UInt32 *hash, const UInt32 *limit, UInt32 size, UInt32 *posRes)
+{
+  do
+  {
+  UInt32 *_distances = ++distances;
+  UInt32 delta = *hash++;
+
+  CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
+  CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+  unsigned len0 = 0, len1 = 0;
+  UInt32 cutValue = _cutValue;
+  unsigned maxLen = (unsigned)_maxLen;
+
+  /*
+  if (size > 1)
+  {
+    UInt32 delta = *hash;
+    if (delta < _cyclicBufferSize)
+    {
+      UInt32 cyc1 = _cyclicBufferPos + 1;
+      CLzRef *pair = son + ((size_t)(cyc1 - delta + ((delta > cyc1) ? _cyclicBufferSize : 0)) << 1);
+      Byte b = *(cur + 1 - delta);
+      _distances[0] = pair[0];
+      _distances[1] = b;
+    }
+  }
+  */
+  if (cutValue == 0 || delta >= _cyclicBufferSize)
+  {
+    *ptr0 = *ptr1 = kEmptyHashValue;
+  }
+  else
+  for(;;)
+  {
+    {
+      CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((_cyclicBufferPos < delta) ? _cyclicBufferSize : 0)) << 1);
+      const Byte *pb = cur - delta;
+      unsigned len = (len0 < len1 ? len0 : len1);
+      UInt32 pair0 = *pair;
+      if (pb[len] == cur[len])
+      {
+        if (++len != lenLimit && pb[len] == cur[len])
+          while (++len != lenLimit)
+            if (pb[len] != cur[len])
+              break;
+        if (maxLen < len)
+        {
+          maxLen = len;
+          *distances++ = (UInt32)len;
+          *distances++ = delta - 1;
+          if (len == lenLimit)
+          {
+            UInt32 pair1 = pair[1];
+            *ptr1 = pair0;
+            *ptr0 = pair1;
+            break;
+          }
+        }
+      }
+      {
+        UInt32 curMatch = pos - delta;
+        // delta = pos - *pair;
+        // delta = pos - pair[((UInt32)pb[len] - (UInt32)cur[len]) >> 31];
+        if (pb[len] < cur[len])
+        {
+          delta = pos - pair[1];
+          *ptr1 = curMatch;
+          ptr1 = pair + 1;
+          len1 = len;
+        }
+        else
+        {
+          delta = pos - *pair;
+          *ptr0 = curMatch;
+          ptr0 = pair;
+          len0 = len;
+        }
+      }
+    }
+    if (--cutValue == 0 || delta >= _cyclicBufferSize)
+    {
+      *ptr0 = *ptr1 = kEmptyHashValue;
+      break;
+    }
+  }
+  pos++;
+  _cyclicBufferPos++;
+  cur++;
+  {
+    UInt32 num = (UInt32)(distances - _distances);
+    _distances[-1] = num;
+  }
+  }
+  while (distances < limit && --size != 0);
+  *posRes = pos;
+  return distances;
+}
+
+#endif
+
+
+
+static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances)
+{
+  UInt32 numProcessed = 0;
+  UInt32 curPos = 2;
+  UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2); //  * 2
+  
+  distances[1] = p->hashNumAvail;
+  
+  while (curPos < limit)
+  {
+    if (p->hashBufPos == p->hashBufPosLimit)
+    {
+      MatchFinderMt_GetNextBlock_Hash(p);
+      distances[1] = numProcessed + p->hashNumAvail;
+      if (p->hashNumAvail >= p->numHashBytes)
+        continue;
+      distances[0] = curPos + p->hashNumAvail;
+      distances += curPos;
+      for (; p->hashNumAvail != 0; p->hashNumAvail--)
+        *distances++ = 0;
+      return;
+    }
+    {
+      UInt32 size = p->hashBufPosLimit - p->hashBufPos;
+      UInt32 lenLimit = p->matchMaxLen;
+      UInt32 pos = p->pos;
+      UInt32 cyclicBufferPos = p->cyclicBufferPos;
+      if (lenLimit >= p->hashNumAvail)
+        lenLimit = p->hashNumAvail;
+      {
+        UInt32 size2 = p->hashNumAvail - lenLimit + 1;
+        if (size2 < size)
+          size = size2;
+        size2 = p->cyclicBufferSize - cyclicBufferPos;
+        if (size2 < size)
+          size = size2;
+      }
+      
+      #ifndef MFMT_GM_INLINE
+      while (curPos < limit && size-- != 0)
+      {
+        UInt32 *startDistances = distances + curPos;
+        UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++],
+            pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,
+            startDistances + 1, p->numHashBytes - 1) - startDistances);
+        *startDistances = num - 1;
+        curPos += num;
+        cyclicBufferPos++;
+        pos++;
+        p->buffer++;
+      }
+      #else
+      {
+        UInt32 posRes;
+        curPos = (UInt32)(GetMatchesSpecN(lenLimit, pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,
+            distances + curPos, p->numHashBytes - 1, p->hashBuf + p->hashBufPos,
+            distances + limit,
+            size, &posRes) - distances);
+        p->hashBufPos += posRes - pos;
+        cyclicBufferPos += posRes - pos;
+        p->buffer += posRes - pos;
+        pos = posRes;
+      }
+      #endif
+
+      numProcessed += pos - p->pos;
+      p->hashNumAvail -= pos - p->pos;
+      p->pos = pos;
+      if (cyclicBufferPos == p->cyclicBufferSize)
+        cyclicBufferPos = 0;
+      p->cyclicBufferPos = cyclicBufferPos;
+    }
+  }
+  
+  distances[0] = curPos;
+}
+
+static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex)
+{
+  CMtSync *sync = &p->hashSync;
+  if (!sync->needStart)
+  {
+    CriticalSection_Enter(&sync->cs);
+    sync->csWasEntered = True;
+  }
+  
+  BtGetMatches(p, p->btBuf + (globalBlockIndex & kMtBtNumBlocksMask) * kMtBtBlockSize);
+
+  if (p->pos > kMtMaxValForNormalize - kMtBtBlockSize)
+  {
+    UInt32 subValue = p->pos - p->cyclicBufferSize;
+    MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2);
+    p->pos -= subValue;
+  }
+
+  if (!sync->needStart)
+  {
+    CriticalSection_Leave(&sync->cs);
+    sync->csWasEntered = False;
+  }
+}
+
+void BtThreadFunc(CMatchFinderMt *mt)
+{
+  CMtSync *p = &mt->btSync;
+  for (;;)
+  {
+    UInt32 blockIndex = 0;
+    Event_Wait(&p->canStart);
+    Event_Set(&p->wasStarted);
+    for (;;)
+    {
+      if (p->exit)
+        return;
+      if (p->stopWriting)
+      {
+        p->numProcessedBlocks = blockIndex;
+        MtSync_StopWriting(&mt->hashSync);
+        Event_Set(&p->wasStopped);
+        break;
+      }
+      Semaphore_Wait(&p->freeSemaphore);
+      BtFillBlock(mt, blockIndex++);
+      Semaphore_Release1(&p->filledSemaphore);
+    }
+  }
+}
+
+void MatchFinderMt_Construct(CMatchFinderMt *p)
+{
+  p->hashBuf = NULL;
+  MtSync_Construct(&p->hashSync);
+  MtSync_Construct(&p->btSync);
+}
+
+static void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAllocPtr alloc)
+{
+  ISzAlloc_Free(alloc, p->hashBuf);
+  p->hashBuf = NULL;
+}
+
+void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc)
+{
+  MtSync_Destruct(&p->hashSync);
+  MtSync_Destruct(&p->btSync);
+  MatchFinderMt_FreeMem(p, alloc);
+}
+
+#define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks)
+#define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks)
+
+static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p);  return 0; }
+static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE BtThreadFunc2(void *p)
+{
+  Byte allocaDummy[0x180];
+  unsigned i = 0;
+  for (i = 0; i < 16; i++)
+    allocaDummy[i] = (Byte)0;
+  if (allocaDummy[0] == 0)
+    BtThreadFunc((CMatchFinderMt *)p);
+  return 0;
+}
+
+SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,
+    UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc)
+{
+  CMatchFinder *mf = p->MatchFinder;
+  p->historySize = historySize;
+  if (kMtBtBlockSize <= matchMaxLen * 4)
+    return SZ_ERROR_PARAM;
+  if (!p->hashBuf)
+  {
+    p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, (kHashBufferSize + kBtBufferSize) * sizeof(UInt32));
+    if (!p->hashBuf)
+      return SZ_ERROR_MEM;
+    p->btBuf = p->hashBuf + kHashBufferSize;
+  }
+  keepAddBufferBefore += (kHashBufferSize + kBtBufferSize);
+  keepAddBufferAfter += kMtHashBlockSize;
+  if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc))
+    return SZ_ERROR_MEM;
+
+  RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p, kMtHashNumBlocks));
+  RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p, kMtBtNumBlocks));
+  return SZ_OK;
+}
+
+/* Call it after ReleaseStream / SetStream */
+static void MatchFinderMt_Init(CMatchFinderMt *p)
+{
+  CMatchFinder *mf = p->MatchFinder;
+  
+  p->btBufPos =
+  p->btBufPosLimit = 0;
+  p->hashBufPos =
+  p->hashBufPosLimit = 0;
+
+  /* Init without data reading. We don't want to read data in this thread */
+  MatchFinder_Init_3(mf, False);
+  MatchFinder_Init_LowHash(mf);
+  
+  p->pointerToCurPos = Inline_MatchFinder_GetPointerToCurrentPos(mf);
+  p->btNumAvailBytes = 0;
+  p->lzPos = p->historySize + 1;
+
+  p->hash = mf->hash;
+  p->fixedHashSize = mf->fixedHashSize;
+  p->crc = mf->crc;
+
+  p->son = mf->son;
+  p->matchMaxLen = mf->matchMaxLen;
+  p->numHashBytes = mf->numHashBytes;
+  p->pos = mf->pos;
+  p->buffer = mf->buffer;
+  p->cyclicBufferPos = mf->cyclicBufferPos;
+  p->cyclicBufferSize = mf->cyclicBufferSize;
+  p->cutValue = mf->cutValue;
+}
+
+/* ReleaseStream is required to finish multithreading */
+void MatchFinderMt_ReleaseStream(CMatchFinderMt *p)
+{
+  MtSync_StopWriting(&p->btSync);
+  /* p->MatchFinder->ReleaseStream(); */
+}
+
+static void MatchFinderMt_Normalize(CMatchFinderMt *p)
+{
+  MatchFinder_Normalize3(p->lzPos - p->historySize - 1, p->hash, p->fixedHashSize);
+  p->lzPos = p->historySize + 1;
+}
+
+static void MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)
+{
+  UInt32 blockIndex;
+  MtSync_GetNextBlock(&p->btSync);
+  blockIndex = ((p->btSync.numProcessedBlocks - 1) & kMtBtNumBlocksMask);
+  p->btBufPosLimit = p->btBufPos = blockIndex * kMtBtBlockSize;
+  p->btBufPosLimit += p->btBuf[p->btBufPos++];
+  p->btNumAvailBytes = p->btBuf[p->btBufPos++];
+  if (p->lzPos >= kMtMaxValForNormalize - kMtBtBlockSize)
+    MatchFinderMt_Normalize(p);
+}
+
+static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p)
+{
+  return p->pointerToCurPos;
+}
+
+#define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p);
+
+static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p)
+{
+  GET_NEXT_BLOCK_IF_REQUIRED;
+  return p->btNumAvailBytes;
+}
+
+static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances)
+{
+  UInt32 h2, curMatch2;
+  UInt32 *hash = p->hash;
+  const Byte *cur = p->pointerToCurPos;
+  UInt32 lzPos = p->lzPos;
+  MT_HASH2_CALC
+      
+  curMatch2 = hash[h2];
+  hash[h2] = lzPos;
+
+  if (curMatch2 >= matchMinPos)
+    if (cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0])
+    {
+      *distances++ = 2;
+      *distances++ = lzPos - curMatch2 - 1;
+    }
+  
+  return distances;
+}
+
+static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances)
+{
+  UInt32 h2, h3, curMatch2, curMatch3;
+  UInt32 *hash = p->hash;
+  const Byte *cur = p->pointerToCurPos;
+  UInt32 lzPos = p->lzPos;
+  MT_HASH3_CALC
+
+  curMatch2 = hash[                h2];
+  curMatch3 = (hash + kFix3HashSize)[h3];
+  
+  hash[                h2] = lzPos;
+  (hash + kFix3HashSize)[h3] = lzPos;
+
+  if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0])
+  {
+    distances[1] = lzPos - curMatch2 - 1;
+    if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2])
+    {
+      distances[0] = 3;
+      return distances + 2;
+    }
+    distances[0] = 2;
+    distances += 2;
+  }
+  
+  if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0])
+  {
+    *distances++ = 3;
+    *distances++ = lzPos - curMatch3 - 1;
+  }
+  
+  return distances;
+}
+
+/*
+static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances)
+{
+  UInt32 h2, h3, h4, curMatch2, curMatch3, curMatch4;
+  UInt32 *hash = p->hash;
+  const Byte *cur = p->pointerToCurPos;
+  UInt32 lzPos = p->lzPos;
+  MT_HASH4_CALC
+      
+  curMatch2 = hash[                h2];
+  curMatch3 = (hash + kFix3HashSize)[h3];
+  curMatch4 = (hash + kFix4HashSize)[h4];
+  
+  hash[                h2] = lzPos;
+  (hash + kFix3HashSize)[h3] = lzPos;
+  (hash + kFix4HashSize)[h4] = lzPos;
+
+  if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0])
+  {
+    distances[1] = lzPos - curMatch2 - 1;
+    if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2])
+    {
+      distances[0] = (cur[(ptrdiff_t)curMatch2 - lzPos + 3] == cur[3]) ? 4 : 3;
+      return distances + 2;
+    }
+    distances[0] = 2;
+    distances += 2;
+  }
+  
+  if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0])
+  {
+    distances[1] = lzPos - curMatch3 - 1;
+    if (cur[(ptrdiff_t)curMatch3 - lzPos + 3] == cur[3])
+    {
+      distances[0] = 4;
+      return distances + 2;
+    }
+    distances[0] = 3;
+    distances += 2;
+  }
+
+  if (curMatch4 >= matchMinPos)
+    if (
+      cur[(ptrdiff_t)curMatch4 - lzPos] == cur[0] &&
+      cur[(ptrdiff_t)curMatch4 - lzPos + 3] == cur[3]
+      )
+    {
+      *distances++ = 4;
+      *distances++ = lzPos - curMatch4 - 1;
+    }
+  
+  return distances;
+}
+*/
+
+#define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++;
+
+static UInt32 MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *distances)
+{
+  const UInt32 *btBuf = p->btBuf + p->btBufPos;
+  UInt32 len = *btBuf++;
+  p->btBufPos += 1 + len;
+  p->btNumAvailBytes--;
+  {
+    UInt32 i;
+    for (i = 0; i < len; i += 2)
+    {
+      UInt32 v0 = btBuf[0];
+      UInt32 v1 = btBuf[1];
+      btBuf += 2;
+      distances[0] = v0;
+      distances[1] = v1;
+      distances += 2;
+    }
+  }
+  INCREASE_LZ_POS
+  return len;
+}
+
+static UInt32 MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *distances)
+{
+  const UInt32 *btBuf = p->btBuf + p->btBufPos;
+  UInt32 len = *btBuf++;
+  p->btBufPos += 1 + len;
+
+  if (len == 0)
+  {
+    /* change for bt5 ! */
+    if (p->btNumAvailBytes-- >= 4)
+      len = (UInt32)(p->MixMatchesFunc(p, p->lzPos - p->historySize, distances) - (distances));
+  }
+  else
+  {
+    /* Condition: there are matches in btBuf with length < p->numHashBytes */
+    UInt32 *distances2;
+    p->btNumAvailBytes--;
+    distances2 = p->MixMatchesFunc(p, p->lzPos - btBuf[1], distances);
+    do
+    {
+      UInt32 v0 = btBuf[0];
+      UInt32 v1 = btBuf[1];
+      btBuf += 2;
+      distances2[0] = v0;
+      distances2[1] = v1;
+      distances2 += 2;
+    }
+    while ((len -= 2) != 0);
+    len = (UInt32)(distances2 - (distances));
+  }
+  INCREASE_LZ_POS
+  return len;
+}
+
+#define SKIP_HEADER2_MT  do { GET_NEXT_BLOCK_IF_REQUIRED
+#define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash;
+#define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += p->btBuf[p->btBufPos] + 1; } while (--num != 0);
+
+static void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num)
+{
+  SKIP_HEADER2_MT { p->btNumAvailBytes--;
+  SKIP_FOOTER_MT
+}
+
+static void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num)
+{
+  SKIP_HEADER_MT(2)
+      UInt32 h2;
+      MT_HASH2_CALC
+      hash[h2] = p->lzPos;
+  SKIP_FOOTER_MT
+}
+
+static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num)
+{
+  SKIP_HEADER_MT(3)
+      UInt32 h2, h3;
+      MT_HASH3_CALC
+      (hash + kFix3HashSize)[h3] =
+      hash[                h2] =
+        p->lzPos;
+  SKIP_FOOTER_MT
+}
+
+/*
+static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num)
+{
+  SKIP_HEADER_MT(4)
+      UInt32 h2, h3, h4;
+      MT_HASH4_CALC
+      (hash + kFix4HashSize)[h4] =
+      (hash + kFix3HashSize)[h3] =
+      hash[                h2] =
+        p->lzPos;
+  SKIP_FOOTER_MT
+}
+*/
+
+void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable)
+{
+  vTable->Init = (Mf_Init_Func)MatchFinderMt_Init;
+  vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes;
+  vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos;
+  vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches;
+  
+  switch (p->MatchFinder->numHashBytes)
+  {
+    case 2:
+      p->GetHeadsFunc = GetHeads2;
+      p->MixMatchesFunc = (Mf_Mix_Matches)NULL;
+      vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip;
+      vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches;
+      break;
+    case 3:
+      p->GetHeadsFunc = GetHeads3;
+      p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2;
+      vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip;
+      break;
+    default:
+    /* case 4: */
+      p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads4b : GetHeads4;
+      p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3;
+      vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip;
+      break;
+    /*
+    default:
+      p->GetHeadsFunc = GetHeads5;
+      p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4;
+      vTable->Skip = (Mf_Skip_Func)MatchFinderMt4_Skip;
+      break;
+    */
+  }
+}
diff --git a/libraries/lzma/C/LzFindMt.h b/libraries/lzma/C/LzFindMt.h
new file mode 100644
index 000000000..ef431e3f5
--- /dev/null
+++ b/libraries/lzma/C/LzFindMt.h
@@ -0,0 +1,101 @@
+/* LzFindMt.h -- multithreaded Match finder for LZ algorithms
+2018-07-04 : Igor Pavlov : Public domain */
+
+#ifndef __LZ_FIND_MT_H
+#define __LZ_FIND_MT_H
+
+#include "LzFind.h"
+#include "Threads.h"
+
+EXTERN_C_BEGIN
+
+#define kMtHashBlockSize (1 << 13)
+#define kMtHashNumBlocks (1 << 3)
+#define kMtHashNumBlocksMask (kMtHashNumBlocks - 1)
+
+#define kMtBtBlockSize (1 << 14)
+#define kMtBtNumBlocks (1 << 6)
+#define kMtBtNumBlocksMask (kMtBtNumBlocks - 1)
+
+typedef struct _CMtSync
+{
+  BoolInt wasCreated;
+  BoolInt needStart;
+  BoolInt exit;
+  BoolInt stopWriting;
+
+  CThread thread;
+  CAutoResetEvent canStart;
+  CAutoResetEvent wasStarted;
+  CAutoResetEvent wasStopped;
+  CSemaphore freeSemaphore;
+  CSemaphore filledSemaphore;
+  BoolInt csWasInitialized;
+  BoolInt csWasEntered;
+  CCriticalSection cs;
+  UInt32 numProcessedBlocks;
+} CMtSync;
+
+typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances);
+
+/* kMtCacheLineDummy must be >= size_of_CPU_cache_line */
+#define kMtCacheLineDummy 128
+
+typedef void (*Mf_GetHeads)(const Byte *buffer, UInt32 pos,
+  UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc);
+
+typedef struct _CMatchFinderMt
+{
+  /* LZ */
+  const Byte *pointerToCurPos;
+  UInt32 *btBuf;
+  UInt32 btBufPos;
+  UInt32 btBufPosLimit;
+  UInt32 lzPos;
+  UInt32 btNumAvailBytes;
+
+  UInt32 *hash;
+  UInt32 fixedHashSize;
+  UInt32 historySize;
+  const UInt32 *crc;
+
+  Mf_Mix_Matches MixMatchesFunc;
+  
+  /* LZ + BT */
+  CMtSync btSync;
+  Byte btDummy[kMtCacheLineDummy];
+
+  /* BT */
+  UInt32 *hashBuf;
+  UInt32 hashBufPos;
+  UInt32 hashBufPosLimit;
+  UInt32 hashNumAvail;
+
+  CLzRef *son;
+  UInt32 matchMaxLen;
+  UInt32 numHashBytes;
+  UInt32 pos;
+  const Byte *buffer;
+  UInt32 cyclicBufferPos;
+  UInt32 cyclicBufferSize; /* it must be historySize + 1 */
+  UInt32 cutValue;
+
+  /* BT + Hash */
+  CMtSync hashSync;
+  /* Byte hashDummy[kMtCacheLineDummy]; */
+  
+  /* Hash */
+  Mf_GetHeads GetHeadsFunc;
+  CMatchFinder *MatchFinder;
+} CMatchFinderMt;
+
+void MatchFinderMt_Construct(CMatchFinderMt *p);
+void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc);
+SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,
+    UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc);
+void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable);
+void MatchFinderMt_ReleaseStream(CMatchFinderMt *p);
+
+EXTERN_C_END
+
+#endif
diff --git a/libraries/lzma/C/LzHash.h b/libraries/lzma/C/LzHash.h
new file mode 100644
index 000000000..e7c942303
--- /dev/null
+++ b/libraries/lzma/C/LzHash.h
@@ -0,0 +1,57 @@
+/* LzHash.h -- HASH functions for LZ algorithms
+2015-04-12 : Igor Pavlov : Public domain */
+
+#ifndef __LZ_HASH_H
+#define __LZ_HASH_H
+
+#define kHash2Size (1 << 10)
+#define kHash3Size (1 << 16)
+#define kHash4Size (1 << 20)
+
+#define kFix3HashSize (kHash2Size)
+#define kFix4HashSize (kHash2Size + kHash3Size)
+#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
+
+#define HASH2_CALC hv = cur[0] | ((UInt32)cur[1] << 8);
+
+#define HASH3_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  h2 = temp & (kHash2Size - 1); \
+  hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
+
+#define HASH4_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  h2 = temp & (kHash2Size - 1); \
+  temp ^= ((UInt32)cur[2] << 8); \
+  h3 = temp & (kHash3Size - 1); \
+  hv = (temp ^ (p->crc[cur[3]] << 5)) & p->hashMask; }
+
+#define HASH5_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  h2 = temp & (kHash2Size - 1); \
+  temp ^= ((UInt32)cur[2] << 8); \
+  h3 = temp & (kHash3Size - 1); \
+  temp ^= (p->crc[cur[3]] << 5); \
+  h4 = temp & (kHash4Size - 1); \
+  hv = (temp ^ (p->crc[cur[4]] << 3)) & p->hashMask; }
+
+/* #define HASH_ZIP_CALC hv = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */
+#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
+
+
+#define MT_HASH2_CALC \
+  h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1);
+
+#define MT_HASH3_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  h2 = temp & (kHash2Size - 1); \
+  h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
+
+#define MT_HASH4_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  h2 = temp & (kHash2Size - 1); \
+  temp ^= ((UInt32)cur[2] << 8); \
+  h3 = temp & (kHash3Size - 1); \
+  h4 = (temp ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); }
+
+#endif
diff --git a/libraries/lzma/C/Lzma2Dec.c b/libraries/lzma/C/Lzma2Dec.c
new file mode 100644
index 000000000..4e138a4ae
--- /dev/null
+++ b/libraries/lzma/C/Lzma2Dec.c
@@ -0,0 +1,488 @@
+/* Lzma2Dec.c -- LZMA2 Decoder
+2019-02-02 : Igor Pavlov : Public domain */
+
+/* #define SHOW_DEBUG_INFO */
+
+#include "Precomp.h"
+
+#ifdef SHOW_DEBUG_INFO
+#include <stdio.h>
+#endif
+
+#include <string.h>
+
+#include "Lzma2Dec.h"
+
+/*
+00000000  -  End of data
+00000001 U U  -  Uncompressed, reset dic, need reset state and set new prop
+00000010 U U  -  Uncompressed, no reset
+100uuuuu U U P P  -  LZMA, no reset
+101uuuuu U U P P  -  LZMA, reset state
+110uuuuu U U P P S  -  LZMA, reset state + set new prop
+111uuuuu U U P P S  -  LZMA, reset state + set new prop, reset dic
+
+  u, U - Unpack Size
+  P - Pack Size
+  S - Props
+*/
+
+#define LZMA2_CONTROL_COPY_RESET_DIC 1
+
+#define LZMA2_IS_UNCOMPRESSED_STATE(p) (((p)->control & (1 << 7)) == 0)
+
+#define LZMA2_LCLP_MAX 4
+#define LZMA2_DIC_SIZE_FROM_PROP(p) (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11))
+
+#ifdef SHOW_DEBUG_INFO
+#define PRF(x) x
+#else
+#define PRF(x)
+#endif
+
+typedef enum
+{
+  LZMA2_STATE_CONTROL,
+  LZMA2_STATE_UNPACK0,
+  LZMA2_STATE_UNPACK1,
+  LZMA2_STATE_PACK0,
+  LZMA2_STATE_PACK1,
+  LZMA2_STATE_PROP,
+  LZMA2_STATE_DATA,
+  LZMA2_STATE_DATA_CONT,
+  LZMA2_STATE_FINISHED,
+  LZMA2_STATE_ERROR
+} ELzma2State;
+
+static SRes Lzma2Dec_GetOldProps(Byte prop, Byte *props)
+{
+  UInt32 dicSize;
+  if (prop > 40)
+    return SZ_ERROR_UNSUPPORTED;
+  dicSize = (prop == 40) ? 0xFFFFFFFF : LZMA2_DIC_SIZE_FROM_PROP(prop);
+  props[0] = (Byte)LZMA2_LCLP_MAX;
+  props[1] = (Byte)(dicSize);
+  props[2] = (Byte)(dicSize >> 8);
+  props[3] = (Byte)(dicSize >> 16);
+  props[4] = (Byte)(dicSize >> 24);
+  return SZ_OK;
+}
+
+SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc)
+{
+  Byte props[LZMA_PROPS_SIZE];
+  RINOK(Lzma2Dec_GetOldProps(prop, props));
+  return LzmaDec_AllocateProbs(&p->decoder, props, LZMA_PROPS_SIZE, alloc);
+}
+
+SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc)
+{
+  Byte props[LZMA_PROPS_SIZE];
+  RINOK(Lzma2Dec_GetOldProps(prop, props));
+  return LzmaDec_Allocate(&p->decoder, props, LZMA_PROPS_SIZE, alloc);
+}
+
+void Lzma2Dec_Init(CLzma2Dec *p)
+{
+  p->state = LZMA2_STATE_CONTROL;
+  p->needInitLevel = 0xE0;
+  p->isExtraMode = False;
+  p->unpackSize = 0;
+  
+  // p->decoder.dicPos = 0; // we can use it instead of full init
+  LzmaDec_Init(&p->decoder);
+}
+
+static ELzma2State Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b)
+{
+  switch (p->state)
+  {
+    case LZMA2_STATE_CONTROL:
+      p->isExtraMode = False;
+      p->control = b;
+      PRF(printf("\n %8X", (unsigned)p->decoder.dicPos));
+      PRF(printf(" %02X", (unsigned)b));
+      if (b == 0)
+        return LZMA2_STATE_FINISHED;
+      if (LZMA2_IS_UNCOMPRESSED_STATE(p))
+      {
+        if (b == LZMA2_CONTROL_COPY_RESET_DIC)
+          p->needInitLevel = 0xC0;
+        else if (b > 2 || p->needInitLevel == 0xE0)
+          return LZMA2_STATE_ERROR;
+      }
+      else
+      {
+        if (b < p->needInitLevel)
+          return LZMA2_STATE_ERROR;
+        p->needInitLevel = 0;
+        p->unpackSize = (UInt32)(b & 0x1F) << 16;
+      }
+      return LZMA2_STATE_UNPACK0;
+    
+    case LZMA2_STATE_UNPACK0:
+      p->unpackSize |= (UInt32)b << 8;
+      return LZMA2_STATE_UNPACK1;
+    
+    case LZMA2_STATE_UNPACK1:
+      p->unpackSize |= (UInt32)b;
+      p->unpackSize++;
+      PRF(printf(" %7u", (unsigned)p->unpackSize));
+      return LZMA2_IS_UNCOMPRESSED_STATE(p) ? LZMA2_STATE_DATA : LZMA2_STATE_PACK0;
+    
+    case LZMA2_STATE_PACK0:
+      p->packSize = (UInt32)b << 8;
+      return LZMA2_STATE_PACK1;
+
+    case LZMA2_STATE_PACK1:
+      p->packSize |= (UInt32)b;
+      p->packSize++;
+      // if (p->packSize < 5) return LZMA2_STATE_ERROR;
+      PRF(printf(" %5u", (unsigned)p->packSize));
+      return (p->control & 0x40) ? LZMA2_STATE_PROP : LZMA2_STATE_DATA;
+
+    case LZMA2_STATE_PROP:
+    {
+      unsigned lc, lp;
+      if (b >= (9 * 5 * 5))
+        return LZMA2_STATE_ERROR;
+      lc = b % 9;
+      b /= 9;
+      p->decoder.prop.pb = (Byte)(b / 5);
+      lp = b % 5;
+      if (lc + lp > LZMA2_LCLP_MAX)
+        return LZMA2_STATE_ERROR;
+      p->decoder.prop.lc = (Byte)lc;
+      p->decoder.prop.lp = (Byte)lp;
+      return LZMA2_STATE_DATA;
+    }
+  }
+  return LZMA2_STATE_ERROR;
+}
+
+static void LzmaDec_UpdateWithUncompressed(CLzmaDec *p, const Byte *src, SizeT size)
+{
+  memcpy(p->dic + p->dicPos, src, size);
+  p->dicPos += size;
+  if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= size)
+    p->checkDicSize = p->prop.dicSize;
+  p->processedPos += (UInt32)size;
+}
+
+void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState);
+
+
+SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit,
+    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
+{
+  SizeT inSize = *srcLen;
+  *srcLen = 0;
+  *status = LZMA_STATUS_NOT_SPECIFIED;
+
+  while (p->state != LZMA2_STATE_ERROR)
+  {
+    SizeT dicPos;
+
+    if (p->state == LZMA2_STATE_FINISHED)
+    {
+      *status = LZMA_STATUS_FINISHED_WITH_MARK;
+      return SZ_OK;
+    }
+    
+    dicPos = p->decoder.dicPos;
+    
+    if (dicPos == dicLimit && finishMode == LZMA_FINISH_ANY)
+    {
+      *status = LZMA_STATUS_NOT_FINISHED;
+      return SZ_OK;
+    }
+
+    if (p->state != LZMA2_STATE_DATA && p->state != LZMA2_STATE_DATA_CONT)
+    {
+      if (*srcLen == inSize)
+      {
+        *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+        return SZ_OK;
+      }
+      (*srcLen)++;
+      p->state = Lzma2Dec_UpdateState(p, *src++);
+      if (dicPos == dicLimit && p->state != LZMA2_STATE_FINISHED)
+        break;
+      continue;
+    }
+    
+    {
+      SizeT inCur = inSize - *srcLen;
+      SizeT outCur = dicLimit - dicPos;
+      ELzmaFinishMode curFinishMode = LZMA_FINISH_ANY;
+      
+      if (outCur >= p->unpackSize)
+      {
+        outCur = (SizeT)p->unpackSize;
+        curFinishMode = LZMA_FINISH_END;
+      }
+
+      if (LZMA2_IS_UNCOMPRESSED_STATE(p))
+      {
+        if (inCur == 0)
+        {
+          *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+          return SZ_OK;
+        }
+
+        if (p->state == LZMA2_STATE_DATA)
+        {
+          BoolInt initDic = (p->control == LZMA2_CONTROL_COPY_RESET_DIC);
+          LzmaDec_InitDicAndState(&p->decoder, initDic, False);
+        }
+
+        if (inCur > outCur)
+          inCur = outCur;
+        if (inCur == 0)
+          break;
+
+        LzmaDec_UpdateWithUncompressed(&p->decoder, src, inCur);
+
+        src += inCur;
+        *srcLen += inCur;
+        p->unpackSize -= (UInt32)inCur;
+        p->state = (p->unpackSize == 0) ? LZMA2_STATE_CONTROL : LZMA2_STATE_DATA_CONT;
+      }
+      else
+      {
+        SRes res;
+
+        if (p->state == LZMA2_STATE_DATA)
+        {
+          BoolInt initDic = (p->control >= 0xE0);
+          BoolInt initState = (p->control >= 0xA0);
+          LzmaDec_InitDicAndState(&p->decoder, initDic, initState);
+          p->state = LZMA2_STATE_DATA_CONT;
+        }
+  
+        if (inCur > p->packSize)
+          inCur = (SizeT)p->packSize;
+        
+        res = LzmaDec_DecodeToDic(&p->decoder, dicPos + outCur, src, &inCur, curFinishMode, status);
+
+        src += inCur;
+        *srcLen += inCur;
+        p->packSize -= (UInt32)inCur;
+        outCur = p->decoder.dicPos - dicPos;
+        p->unpackSize -= (UInt32)outCur;
+
+        if (res != 0)
+          break;
+        
+        if (*status == LZMA_STATUS_NEEDS_MORE_INPUT)
+        {
+          if (p->packSize == 0)
+            break;
+          return SZ_OK;
+        }
+
+        if (inCur == 0 && outCur == 0)
+        {
+          if (*status != LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
+              || p->unpackSize != 0
+              || p->packSize != 0)
+            break;
+          p->state = LZMA2_STATE_CONTROL;
+        }
+        
+        *status = LZMA_STATUS_NOT_SPECIFIED;
+      }
+    }
+  }
+  
+  *status = LZMA_STATUS_NOT_SPECIFIED;
+  p->state = LZMA2_STATE_ERROR;
+  return SZ_ERROR_DATA;
+}
+
+
+
+
+ELzma2ParseStatus Lzma2Dec_Parse(CLzma2Dec *p,
+    SizeT outSize,
+    const Byte *src, SizeT *srcLen,
+    int checkFinishBlock)
+{
+  SizeT inSize = *srcLen;
+  *srcLen = 0;
+
+  while (p->state != LZMA2_STATE_ERROR)
+  {
+    if (p->state == LZMA2_STATE_FINISHED)
+      return (ELzma2ParseStatus)LZMA_STATUS_FINISHED_WITH_MARK;
+
+    if (outSize == 0 && !checkFinishBlock)
+      return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED;
+    
+    if (p->state != LZMA2_STATE_DATA && p->state != LZMA2_STATE_DATA_CONT)
+    {
+      if (*srcLen == inSize)
+        return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT;
+      (*srcLen)++;
+
+      p->state = Lzma2Dec_UpdateState(p, *src++);
+
+      if (p->state == LZMA2_STATE_UNPACK0)
+      {
+        // if (p->decoder.dicPos != 0)
+        if (p->control == LZMA2_CONTROL_COPY_RESET_DIC || p->control >= 0xE0)
+          return LZMA2_PARSE_STATUS_NEW_BLOCK;
+        // if (outSize == 0) return LZMA_STATUS_NOT_FINISHED;
+      }
+
+      // The following code can be commented.
+      // It's not big problem, if we read additional input bytes.
+      // It will be stopped later in LZMA2_STATE_DATA / LZMA2_STATE_DATA_CONT state.
+
+      if (outSize == 0 && p->state != LZMA2_STATE_FINISHED)
+      {
+        // checkFinishBlock is true. So we expect that block must be finished,
+        // We can return LZMA_STATUS_NOT_SPECIFIED or LZMA_STATUS_NOT_FINISHED here
+        // break;
+        return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED;
+      }
+
+      if (p->state == LZMA2_STATE_DATA)
+        return LZMA2_PARSE_STATUS_NEW_CHUNK;
+
+      continue;
+    }
+
+    if (outSize == 0)
+      return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED;
+
+    {
+      SizeT inCur = inSize - *srcLen;
+
+      if (LZMA2_IS_UNCOMPRESSED_STATE(p))
+      {
+        if (inCur == 0)
+          return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT;
+        if (inCur > p->unpackSize)
+          inCur = p->unpackSize;
+        if (inCur > outSize)
+          inCur = outSize;
+        p->decoder.dicPos += inCur;
+        src += inCur;
+        *srcLen += inCur;
+        outSize -= inCur;
+        p->unpackSize -= (UInt32)inCur;
+        p->state = (p->unpackSize == 0) ? LZMA2_STATE_CONTROL : LZMA2_STATE_DATA_CONT;
+      }
+      else
+      {
+        p->isExtraMode = True;
+
+        if (inCur == 0)
+        {
+          if (p->packSize != 0)
+            return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT;
+        }
+        else if (p->state == LZMA2_STATE_DATA)
+        {
+          p->state = LZMA2_STATE_DATA_CONT;
+          if (*src != 0)
+          {
+            // first byte of lzma chunk must be Zero
+            *srcLen += 1;
+            p->packSize--;
+            break;
+          }
+        }
+  
+        if (inCur > p->packSize)
+          inCur = (SizeT)p->packSize;
+
+        src += inCur;
+        *srcLen += inCur;
+        p->packSize -= (UInt32)inCur;
+
+        if (p->packSize == 0)
+        {
+          SizeT rem = outSize;
+          if (rem > p->unpackSize)
+            rem = p->unpackSize;
+          p->decoder.dicPos += rem;
+          p->unpackSize -= (UInt32)rem;
+          outSize -= rem;
+          if (p->unpackSize == 0)
+            p->state = LZMA2_STATE_CONTROL;
+        }
+      }
+    }
+  }
+  
+  p->state = LZMA2_STATE_ERROR;
+  return (ELzma2ParseStatus)LZMA_STATUS_NOT_SPECIFIED;
+}
+
+
+
+
+SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
+{
+  SizeT outSize = *destLen, inSize = *srcLen;
+  *srcLen = *destLen = 0;
+  
+  for (;;)
+  {
+    SizeT inCur = inSize, outCur, dicPos;
+    ELzmaFinishMode curFinishMode;
+    SRes res;
+    
+    if (p->decoder.dicPos == p->decoder.dicBufSize)
+      p->decoder.dicPos = 0;
+    dicPos = p->decoder.dicPos;
+    curFinishMode = LZMA_FINISH_ANY;
+    outCur = p->decoder.dicBufSize - dicPos;
+    
+    if (outCur >= outSize)
+    {
+      outCur = outSize;
+      curFinishMode = finishMode;
+    }
+
+    res = Lzma2Dec_DecodeToDic(p, dicPos + outCur, src, &inCur, curFinishMode, status);
+    
+    src += inCur;
+    inSize -= inCur;
+    *srcLen += inCur;
+    outCur = p->decoder.dicPos - dicPos;
+    memcpy(dest, p->decoder.dic + dicPos, outCur);
+    dest += outCur;
+    outSize -= outCur;
+    *destLen += outCur;
+    if (res != 0)
+      return res;
+    if (outCur == 0 || outSize == 0)
+      return SZ_OK;
+  }
+}
+
+
+SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+    Byte prop, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAllocPtr alloc)
+{
+  CLzma2Dec p;
+  SRes res;
+  SizeT outSize = *destLen, inSize = *srcLen;
+  *destLen = *srcLen = 0;
+  *status = LZMA_STATUS_NOT_SPECIFIED;
+  Lzma2Dec_Construct(&p);
+  RINOK(Lzma2Dec_AllocateProbs(&p, prop, alloc));
+  p.decoder.dic = dest;
+  p.decoder.dicBufSize = outSize;
+  Lzma2Dec_Init(&p);
+  *srcLen = inSize;
+  res = Lzma2Dec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status);
+  *destLen = p.decoder.dicPos;
+  if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
+    res = SZ_ERROR_INPUT_EOF;
+  Lzma2Dec_FreeProbs(&p, alloc);
+  return res;
+}
diff --git a/libraries/lzma/C/Lzma2Dec.h b/libraries/lzma/C/Lzma2Dec.h
new file mode 100644
index 000000000..b8ddeac89
--- /dev/null
+++ b/libraries/lzma/C/Lzma2Dec.h
@@ -0,0 +1,120 @@
+/* Lzma2Dec.h -- LZMA2 Decoder
+2018-02-19 : Igor Pavlov : Public domain */
+
+#ifndef __LZMA2_DEC_H
+#define __LZMA2_DEC_H
+
+#include "LzmaDec.h"
+
+EXTERN_C_BEGIN
+
+/* ---------- State Interface ---------- */
+
+typedef struct
+{
+  unsigned state;
+  Byte control;
+  Byte needInitLevel;
+  Byte isExtraMode;
+  Byte _pad_;
+  UInt32 packSize;
+  UInt32 unpackSize;
+  CLzmaDec decoder;
+} CLzma2Dec;
+
+#define Lzma2Dec_Construct(p) LzmaDec_Construct(&(p)->decoder)
+#define Lzma2Dec_FreeProbs(p, alloc) LzmaDec_FreeProbs(&(p)->decoder, alloc)
+#define Lzma2Dec_Free(p, alloc) LzmaDec_Free(&(p)->decoder, alloc)
+
+SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc);
+SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc);
+void Lzma2Dec_Init(CLzma2Dec *p);
+
+/*
+finishMode:
+  It has meaning only if the decoding reaches output limit (*destLen or dicLimit).
+  LZMA_FINISH_ANY - use smallest number of input bytes
+  LZMA_FINISH_END - read EndOfStream marker after decoding
+
+Returns:
+  SZ_OK
+    status:
+      LZMA_STATUS_FINISHED_WITH_MARK
+      LZMA_STATUS_NOT_FINISHED
+      LZMA_STATUS_NEEDS_MORE_INPUT
+  SZ_ERROR_DATA - Data error
+*/
+
+SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit,
+    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
+
+SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen,
+    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
+
+
+/* ---------- LZMA2 block and chunk parsing ---------- */
+
+/*
+Lzma2Dec_Parse() parses compressed data stream up to next independent block or next chunk data.
+It can return LZMA_STATUS_* code or LZMA2_PARSE_STATUS_* code:
+  - LZMA2_PARSE_STATUS_NEW_BLOCK - there is new block, and 1 additional byte (control byte of next block header) was read from input.
+  - LZMA2_PARSE_STATUS_NEW_CHUNK - there is new chunk, and only lzma2 header of new chunk was read.
+                                   CLzma2Dec::unpackSize contains unpack size of that chunk
+*/
+
+typedef enum
+{
+/*
+  LZMA_STATUS_NOT_SPECIFIED                 // data error
+  LZMA_STATUS_FINISHED_WITH_MARK
+  LZMA_STATUS_NOT_FINISHED                  //
+  LZMA_STATUS_NEEDS_MORE_INPUT
+  LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK   // unused
+*/
+  LZMA2_PARSE_STATUS_NEW_BLOCK = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + 1,
+  LZMA2_PARSE_STATUS_NEW_CHUNK
+} ELzma2ParseStatus;
+
+ELzma2ParseStatus Lzma2Dec_Parse(CLzma2Dec *p,
+    SizeT outSize,   // output size
+    const Byte *src, SizeT *srcLen,
+    int checkFinishBlock   // set (checkFinishBlock = 1), if it must read full input data, if decoder.dicPos reaches blockMax position.
+    );
+
+/*
+LZMA2 parser doesn't decode LZMA chunks, so we must read
+  full input LZMA chunk to decode some part of LZMA chunk.
+
+Lzma2Dec_GetUnpackExtra() returns the value that shows
+    max possible number of output bytes that can be output by decoder
+    at current input positon.
+*/
+
+#define Lzma2Dec_GetUnpackExtra(p) ((p)->isExtraMode ? (p)->unpackSize : 0);
+
+
+/* ---------- One Call Interface ---------- */
+
+/*
+finishMode:
+  It has meaning only if the decoding reaches output limit (*destLen).
+  LZMA_FINISH_ANY - use smallest number of input bytes
+  LZMA_FINISH_END - read EndOfStream marker after decoding
+
+Returns:
+  SZ_OK
+    status:
+      LZMA_STATUS_FINISHED_WITH_MARK
+      LZMA_STATUS_NOT_FINISHED
+  SZ_ERROR_DATA - Data error
+  SZ_ERROR_MEM  - Memory allocation error
+  SZ_ERROR_UNSUPPORTED - Unsupported properties
+  SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
+*/
+
+SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+    Byte prop, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAllocPtr alloc);
+
+EXTERN_C_END
+
+#endif
diff --git a/libraries/lzma/C/LzmaDec.c b/libraries/lzma/C/LzmaDec.c
new file mode 100644
index 000000000..ba3e1dd50
--- /dev/null
+++ b/libraries/lzma/C/LzmaDec.c
@@ -0,0 +1,1185 @@
+/* LzmaDec.c -- LZMA Decoder
+2018-07-04 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include <string.h>
+
+/* #include "CpuArch.h" */
+#include "LzmaDec.h"
+
+#define kNumTopBits 24
+#define kTopValue ((UInt32)1 << kNumTopBits)
+
+#define kNumBitModelTotalBits 11
+#define kBitModelTotal (1 << kNumBitModelTotalBits)
+#define kNumMoveBits 5
+
+#define RC_INIT_SIZE 5
+
+#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
+
+#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
+#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
+#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits));
+#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \
+  { UPDATE_0(p); i = (i + i); A0; } else \
+  { UPDATE_1(p); i = (i + i) + 1; A1; }
+
+#define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); }
+
+#define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \
+  { UPDATE_0(p + i); A0; } else \
+  { UPDATE_1(p + i); A1; }
+#define REV_BIT_VAR(  p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; )
+#define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m;       , i += m * 2; )
+#define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m        , ; )
+
+#define TREE_DECODE(probs, limit, i) \
+  { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; }
+
+/* #define _LZMA_SIZE_OPT */
+
+#ifdef _LZMA_SIZE_OPT
+#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i)
+#else
+#define TREE_6_DECODE(probs, i) \
+  { i = 1; \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  i -= 0x40; }
+#endif
+
+#define NORMAL_LITER_DEC TREE_GET_BIT(prob, symbol)
+#define MATCHED_LITER_DEC \
+  matchByte += matchByte; \
+  bit = offs; \
+  offs &= matchByte; \
+  probLit = prob + (offs + bit + symbol); \
+  GET_BIT2(probLit, symbol, offs ^= bit; , ;)
+
+
+
+#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); }
+
+#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
+#define UPDATE_0_CHECK range = bound;
+#define UPDATE_1_CHECK range -= bound; code -= bound;
+#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \
+  { UPDATE_0_CHECK; i = (i + i); A0; } else \
+  { UPDATE_1_CHECK; i = (i + i) + 1; A1; }
+#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;)
+#define TREE_DECODE_CHECK(probs, limit, i) \
+  { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; }
+
+
+#define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \
+  { UPDATE_0_CHECK; i += m; m += m; } else \
+  { UPDATE_1_CHECK; m += m; i += m; }
+
+
+#define kNumPosBitsMax 4
+#define kNumPosStatesMax (1 << kNumPosBitsMax)
+
+#define kLenNumLowBits 3
+#define kLenNumLowSymbols (1 << kLenNumLowBits)
+#define kLenNumHighBits 8
+#define kLenNumHighSymbols (1 << kLenNumHighBits)
+
+#define LenLow 0
+#define LenHigh (LenLow + 2 * (kNumPosStatesMax << kLenNumLowBits))
+#define kNumLenProbs (LenHigh + kLenNumHighSymbols)
+
+#define LenChoice LenLow
+#define LenChoice2 (LenLow + (1 << kLenNumLowBits))
+
+#define kNumStates 12
+#define kNumStates2 16
+#define kNumLitStates 7
+
+#define kStartPosModelIndex 4
+#define kEndPosModelIndex 14
+#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
+
+#define kNumPosSlotBits 6
+#define kNumLenToPosStates 4
+
+#define kNumAlignBits 4
+#define kAlignTableSize (1 << kNumAlignBits)
+
+#define kMatchMinLen 2
+#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)
+
+/* External ASM code needs same CLzmaProb array layout. So don't change it. */
+
+/* (probs_1664) is faster and better for code size at some platforms */
+/*
+#ifdef MY_CPU_X86_OR_AMD64
+*/
+#define kStartOffset 1664
+#define GET_PROBS p->probs_1664
+/*
+#define GET_PROBS p->probs + kStartOffset
+#else
+#define kStartOffset 0
+#define GET_PROBS p->probs
+#endif
+*/
+
+#define SpecPos (-kStartOffset)
+#define IsRep0Long (SpecPos + kNumFullDistances)
+#define RepLenCoder (IsRep0Long + (kNumStates2 << kNumPosBitsMax))
+#define LenCoder (RepLenCoder + kNumLenProbs)
+#define IsMatch (LenCoder + kNumLenProbs)
+#define Align (IsMatch + (kNumStates2 << kNumPosBitsMax))
+#define IsRep (Align + kAlignTableSize)
+#define IsRepG0 (IsRep + kNumStates)
+#define IsRepG1 (IsRepG0 + kNumStates)
+#define IsRepG2 (IsRepG1 + kNumStates)
+#define PosSlot (IsRepG2 + kNumStates)
+#define Literal (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
+#define NUM_BASE_PROBS (Literal + kStartOffset)
+
+#if Align != 0 && kStartOffset != 0
+  #error Stop_Compiling_Bad_LZMA_kAlign
+#endif
+
+#if NUM_BASE_PROBS != 1984
+  #error Stop_Compiling_Bad_LZMA_PROBS
+#endif
+
+
+#define LZMA_LIT_SIZE 0x300
+
+#define LzmaProps_GetNumProbs(p) (NUM_BASE_PROBS + ((UInt32)LZMA_LIT_SIZE << ((p)->lc + (p)->lp)))
+
+
+#define CALC_POS_STATE(processedPos, pbMask) (((processedPos) & (pbMask)) << 4)
+#define COMBINED_PS_STATE (posState + state)
+#define GET_LEN_STATE (posState)
+
+#define LZMA_DIC_MIN (1 << 12)
+
+/*
+p->remainLen : shows status of LZMA decoder:
+    < kMatchSpecLenStart : normal remain
+    = kMatchSpecLenStart : finished
+    = kMatchSpecLenStart + 1 : need init range coder
+    = kMatchSpecLenStart + 2 : need init range coder and state
+*/
+
+/* ---------- LZMA_DECODE_REAL ---------- */
+/*
+LzmaDec_DecodeReal_3() can be implemented in external ASM file.
+3 - is the code compatibility version of that function for check at link time.
+*/
+
+#define LZMA_DECODE_REAL LzmaDec_DecodeReal_3
+
+/*
+LZMA_DECODE_REAL()
+In:
+  RangeCoder is normalized
+  if (p->dicPos == limit)
+  {
+    LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases.
+    So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol
+    is not END_OF_PAYALOAD_MARKER, then function returns error code.
+  }
+
+Processing:
+  first LZMA symbol will be decoded in any case
+  All checks for limits are at the end of main loop,
+  It will decode new LZMA-symbols while (p->buf < bufLimit && dicPos < limit),
+  RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked.
+
+Out:
+  RangeCoder is normalized
+  Result:
+    SZ_OK - OK
+    SZ_ERROR_DATA - Error
+  p->remainLen:
+    < kMatchSpecLenStart : normal remain
+    = kMatchSpecLenStart : finished
+*/
+
+
+#ifdef _LZMA_DEC_OPT
+
+int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit);
+
+#else
+
+static
+int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
+{
+  CLzmaProb *probs = GET_PROBS;
+  unsigned state = (unsigned)p->state;
+  UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3];
+  unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1;
+  unsigned lc = p->prop.lc;
+  unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc);
+
+  Byte *dic = p->dic;
+  SizeT dicBufSize = p->dicBufSize;
+  SizeT dicPos = p->dicPos;
+  
+  UInt32 processedPos = p->processedPos;
+  UInt32 checkDicSize = p->checkDicSize;
+  unsigned len = 0;
+
+  const Byte *buf = p->buf;
+  UInt32 range = p->range;
+  UInt32 code = p->code;
+
+  do
+  {
+    CLzmaProb *prob;
+    UInt32 bound;
+    unsigned ttt;
+    unsigned posState = CALC_POS_STATE(processedPos, pbMask);
+
+    prob = probs + IsMatch + COMBINED_PS_STATE;
+    IF_BIT_0(prob)
+    {
+      unsigned symbol;
+      UPDATE_0(prob);
+      prob = probs + Literal;
+      if (processedPos != 0 || checkDicSize != 0)
+        prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc);
+      processedPos++;
+
+      if (state < kNumLitStates)
+      {
+        state -= (state < 4) ? state : 3;
+        symbol = 1;
+        #ifdef _LZMA_SIZE_OPT
+        do { NORMAL_LITER_DEC } while (symbol < 0x100);
+        #else
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        #endif
+      }
+      else
+      {
+        unsigned matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+        unsigned offs = 0x100;
+        state -= (state < 10) ? 3 : 6;
+        symbol = 1;
+        #ifdef _LZMA_SIZE_OPT
+        do
+        {
+          unsigned bit;
+          CLzmaProb *probLit;
+          MATCHED_LITER_DEC
+        }
+        while (symbol < 0x100);
+        #else
+        {
+          unsigned bit;
+          CLzmaProb *probLit;
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+        }
+        #endif
+      }
+
+      dic[dicPos++] = (Byte)symbol;
+      continue;
+    }
+    
+    {
+      UPDATE_1(prob);
+      prob = probs + IsRep + state;
+      IF_BIT_0(prob)
+      {
+        UPDATE_0(prob);
+        state += kNumStates;
+        prob = probs + LenCoder;
+      }
+      else
+      {
+        UPDATE_1(prob);
+        /*
+        // that case was checked before with kBadRepCode
+        if (checkDicSize == 0 && processedPos == 0)
+          return SZ_ERROR_DATA;
+        */
+        prob = probs + IsRepG0 + state;
+        IF_BIT_0(prob)
+        {
+          UPDATE_0(prob);
+          prob = probs + IsRep0Long + COMBINED_PS_STATE;
+          IF_BIT_0(prob)
+          {
+            UPDATE_0(prob);
+            dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+            dicPos++;
+            processedPos++;
+            state = state < kNumLitStates ? 9 : 11;
+            continue;
+          }
+          UPDATE_1(prob);
+        }
+        else
+        {
+          UInt32 distance;
+          UPDATE_1(prob);
+          prob = probs + IsRepG1 + state;
+          IF_BIT_0(prob)
+          {
+            UPDATE_0(prob);
+            distance = rep1;
+          }
+          else
+          {
+            UPDATE_1(prob);
+            prob = probs + IsRepG2 + state;
+            IF_BIT_0(prob)
+            {
+              UPDATE_0(prob);
+              distance = rep2;
+            }
+            else
+            {
+              UPDATE_1(prob);
+              distance = rep3;
+              rep3 = rep2;
+            }
+            rep2 = rep1;
+          }
+          rep1 = rep0;
+          rep0 = distance;
+        }
+        state = state < kNumLitStates ? 8 : 11;
+        prob = probs + RepLenCoder;
+      }
+      
+      #ifdef _LZMA_SIZE_OPT
+      {
+        unsigned lim, offset;
+        CLzmaProb *probLen = prob + LenChoice;
+        IF_BIT_0(probLen)
+        {
+          UPDATE_0(probLen);
+          probLen = prob + LenLow + GET_LEN_STATE;
+          offset = 0;
+          lim = (1 << kLenNumLowBits);
+        }
+        else
+        {
+          UPDATE_1(probLen);
+          probLen = prob + LenChoice2;
+          IF_BIT_0(probLen)
+          {
+            UPDATE_0(probLen);
+            probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
+            offset = kLenNumLowSymbols;
+            lim = (1 << kLenNumLowBits);
+          }
+          else
+          {
+            UPDATE_1(probLen);
+            probLen = prob + LenHigh;
+            offset = kLenNumLowSymbols * 2;
+            lim = (1 << kLenNumHighBits);
+          }
+        }
+        TREE_DECODE(probLen, lim, len);
+        len += offset;
+      }
+      #else
+      {
+        CLzmaProb *probLen = prob + LenChoice;
+        IF_BIT_0(probLen)
+        {
+          UPDATE_0(probLen);
+          probLen = prob + LenLow + GET_LEN_STATE;
+          len = 1;
+          TREE_GET_BIT(probLen, len);
+          TREE_GET_BIT(probLen, len);
+          TREE_GET_BIT(probLen, len);
+          len -= 8;
+        }
+        else
+        {
+          UPDATE_1(probLen);
+          probLen = prob + LenChoice2;
+          IF_BIT_0(probLen)
+          {
+            UPDATE_0(probLen);
+            probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
+            len = 1;
+            TREE_GET_BIT(probLen, len);
+            TREE_GET_BIT(probLen, len);
+            TREE_GET_BIT(probLen, len);
+          }
+          else
+          {
+            UPDATE_1(probLen);
+            probLen = prob + LenHigh;
+            TREE_DECODE(probLen, (1 << kLenNumHighBits), len);
+            len += kLenNumLowSymbols * 2;
+          }
+        }
+      }
+      #endif
+
+      if (state >= kNumStates)
+      {
+        UInt32 distance;
+        prob = probs + PosSlot +
+            ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
+        TREE_6_DECODE(prob, distance);
+        if (distance >= kStartPosModelIndex)
+        {
+          unsigned posSlot = (unsigned)distance;
+          unsigned numDirectBits = (unsigned)(((distance >> 1) - 1));
+          distance = (2 | (distance & 1));
+          if (posSlot < kEndPosModelIndex)
+          {
+            distance <<= numDirectBits;
+            prob = probs + SpecPos;
+            {
+              UInt32 m = 1;
+              distance++;
+              do
+              {
+                REV_BIT_VAR(prob, distance, m);
+              }
+              while (--numDirectBits);
+              distance -= m;
+            }
+          }
+          else
+          {
+            numDirectBits -= kNumAlignBits;
+            do
+            {
+              NORMALIZE
+              range >>= 1;
+              
+              {
+                UInt32 t;
+                code -= range;
+                t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */
+                distance = (distance << 1) + (t + 1);
+                code += range & t;
+              }
+              /*
+              distance <<= 1;
+              if (code >= range)
+              {
+                code -= range;
+                distance |= 1;
+              }
+              */
+            }
+            while (--numDirectBits);
+            prob = probs + Align;
+            distance <<= kNumAlignBits;
+            {
+              unsigned i = 1;
+              REV_BIT_CONST(prob, i, 1);
+              REV_BIT_CONST(prob, i, 2);
+              REV_BIT_CONST(prob, i, 4);
+              REV_BIT_LAST (prob, i, 8);
+              distance |= i;
+            }
+            if (distance == (UInt32)0xFFFFFFFF)
+            {
+              len = kMatchSpecLenStart;
+              state -= kNumStates;
+              break;
+            }
+          }
+        }
+        
+        rep3 = rep2;
+        rep2 = rep1;
+        rep1 = rep0;
+        rep0 = distance + 1;
+        state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
+        if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))
+        {
+          p->dicPos = dicPos;
+          return SZ_ERROR_DATA;
+        }
+      }
+
+      len += kMatchMinLen;
+
+      {
+        SizeT rem;
+        unsigned curLen;
+        SizeT pos;
+        
+        if ((rem = limit - dicPos) == 0)
+        {
+          p->dicPos = dicPos;
+          return SZ_ERROR_DATA;
+        }
+        
+        curLen = ((rem < len) ? (unsigned)rem : len);
+        pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0);
+
+        processedPos += (UInt32)curLen;
+
+        len -= curLen;
+        if (curLen <= dicBufSize - pos)
+        {
+          Byte *dest = dic + dicPos;
+          ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos;
+          const Byte *lim = dest + curLen;
+          dicPos += (SizeT)curLen;
+          do
+            *(dest) = (Byte)*(dest + src);
+          while (++dest != lim);
+        }
+        else
+        {
+          do
+          {
+            dic[dicPos++] = dic[pos];
+            if (++pos == dicBufSize)
+              pos = 0;
+          }
+          while (--curLen != 0);
+        }
+      }
+    }
+  }
+  while (dicPos < limit && buf < bufLimit);
+
+  NORMALIZE;
+  
+  p->buf = buf;
+  p->range = range;
+  p->code = code;
+  p->remainLen = (UInt32)len;
+  p->dicPos = dicPos;
+  p->processedPos = processedPos;
+  p->reps[0] = rep0;
+  p->reps[1] = rep1;
+  p->reps[2] = rep2;
+  p->reps[3] = rep3;
+  p->state = (UInt32)state;
+
+  return SZ_OK;
+}
+#endif
+
+static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
+{
+  if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart)
+  {
+    Byte *dic = p->dic;
+    SizeT dicPos = p->dicPos;
+    SizeT dicBufSize = p->dicBufSize;
+    unsigned len = (unsigned)p->remainLen;
+    SizeT rep0 = p->reps[0]; /* we use SizeT to avoid the BUG of VC14 for AMD64 */
+    SizeT rem = limit - dicPos;
+    if (rem < len)
+      len = (unsigned)(rem);
+
+    if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len)
+      p->checkDicSize = p->prop.dicSize;
+
+    p->processedPos += (UInt32)len;
+    p->remainLen -= (UInt32)len;
+    while (len != 0)
+    {
+      len--;
+      dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+      dicPos++;
+    }
+    p->dicPos = dicPos;
+  }
+}
+
+
+#define kRange0 0xFFFFFFFF
+#define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))
+#define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)))
+#if kBadRepCode != (0xC0000000 - 0x400)
+  #error Stop_Compiling_Bad_LZMA_Check
+#endif
+
+static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
+{
+  do
+  {
+    SizeT limit2 = limit;
+    if (p->checkDicSize == 0)
+    {
+      UInt32 rem = p->prop.dicSize - p->processedPos;
+      if (limit - p->dicPos > rem)
+        limit2 = p->dicPos + rem;
+
+      if (p->processedPos == 0)
+        if (p->code >= kBadRepCode)
+          return SZ_ERROR_DATA;
+    }
+
+    RINOK(LZMA_DECODE_REAL(p, limit2, bufLimit));
+    
+    if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize)
+      p->checkDicSize = p->prop.dicSize;
+    
+    LzmaDec_WriteRem(p, limit);
+  }
+  while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart);
+
+  return 0;
+}
+
+typedef enum
+{
+  DUMMY_ERROR, /* unexpected end of input stream */
+  DUMMY_LIT,
+  DUMMY_MATCH,
+  DUMMY_REP
+} ELzmaDummy;
+
+static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize)
+{
+  UInt32 range = p->range;
+  UInt32 code = p->code;
+  const Byte *bufLimit = buf + inSize;
+  const CLzmaProb *probs = GET_PROBS;
+  unsigned state = (unsigned)p->state;
+  ELzmaDummy res;
+
+  {
+    const CLzmaProb *prob;
+    UInt32 bound;
+    unsigned ttt;
+    unsigned posState = CALC_POS_STATE(p->processedPos, (1 << p->prop.pb) - 1);
+
+    prob = probs + IsMatch + COMBINED_PS_STATE;
+    IF_BIT_0_CHECK(prob)
+    {
+      UPDATE_0_CHECK
+
+      /* if (bufLimit - buf >= 7) return DUMMY_LIT; */
+
+      prob = probs + Literal;
+      if (p->checkDicSize != 0 || p->processedPos != 0)
+        prob += ((UInt32)LZMA_LIT_SIZE *
+            ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) +
+            (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
+
+      if (state < kNumLitStates)
+      {
+        unsigned symbol = 1;
+        do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100);
+      }
+      else
+      {
+        unsigned matchByte = p->dic[p->dicPos - p->reps[0] +
+            (p->dicPos < p->reps[0] ? p->dicBufSize : 0)];
+        unsigned offs = 0x100;
+        unsigned symbol = 1;
+        do
+        {
+          unsigned bit;
+          const CLzmaProb *probLit;
+          matchByte += matchByte;
+          bit = offs;
+          offs &= matchByte;
+          probLit = prob + (offs + bit + symbol);
+          GET_BIT2_CHECK(probLit, symbol, offs ^= bit; , ; )
+        }
+        while (symbol < 0x100);
+      }
+      res = DUMMY_LIT;
+    }
+    else
+    {
+      unsigned len;
+      UPDATE_1_CHECK;
+
+      prob = probs + IsRep + state;
+      IF_BIT_0_CHECK(prob)
+      {
+        UPDATE_0_CHECK;
+        state = 0;
+        prob = probs + LenCoder;
+        res = DUMMY_MATCH;
+      }
+      else
+      {
+        UPDATE_1_CHECK;
+        res = DUMMY_REP;
+        prob = probs + IsRepG0 + state;
+        IF_BIT_0_CHECK(prob)
+        {
+          UPDATE_0_CHECK;
+          prob = probs + IsRep0Long + COMBINED_PS_STATE;
+          IF_BIT_0_CHECK(prob)
+          {
+            UPDATE_0_CHECK;
+            NORMALIZE_CHECK;
+            return DUMMY_REP;
+          }
+          else
+          {
+            UPDATE_1_CHECK;
+          }
+        }
+        else
+        {
+          UPDATE_1_CHECK;
+          prob = probs + IsRepG1 + state;
+          IF_BIT_0_CHECK(prob)
+          {
+            UPDATE_0_CHECK;
+          }
+          else
+          {
+            UPDATE_1_CHECK;
+            prob = probs + IsRepG2 + state;
+            IF_BIT_0_CHECK(prob)
+            {
+              UPDATE_0_CHECK;
+            }
+            else
+            {
+              UPDATE_1_CHECK;
+            }
+          }
+        }
+        state = kNumStates;
+        prob = probs + RepLenCoder;
+      }
+      {
+        unsigned limit, offset;
+        const CLzmaProb *probLen = prob + LenChoice;
+        IF_BIT_0_CHECK(probLen)
+        {
+          UPDATE_0_CHECK;
+          probLen = prob + LenLow + GET_LEN_STATE;
+          offset = 0;
+          limit = 1 << kLenNumLowBits;
+        }
+        else
+        {
+          UPDATE_1_CHECK;
+          probLen = prob + LenChoice2;
+          IF_BIT_0_CHECK(probLen)
+          {
+            UPDATE_0_CHECK;
+            probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
+            offset = kLenNumLowSymbols;
+            limit = 1 << kLenNumLowBits;
+          }
+          else
+          {
+            UPDATE_1_CHECK;
+            probLen = prob + LenHigh;
+            offset = kLenNumLowSymbols * 2;
+            limit = 1 << kLenNumHighBits;
+          }
+        }
+        TREE_DECODE_CHECK(probLen, limit, len);
+        len += offset;
+      }
+
+      if (state < 4)
+      {
+        unsigned posSlot;
+        prob = probs + PosSlot +
+            ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) <<
+            kNumPosSlotBits);
+        TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot);
+        if (posSlot >= kStartPosModelIndex)
+        {
+          unsigned numDirectBits = ((posSlot >> 1) - 1);
+
+          /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */
+
+          if (posSlot < kEndPosModelIndex)
+          {
+            prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits);
+          }
+          else
+          {
+            numDirectBits -= kNumAlignBits;
+            do
+            {
+              NORMALIZE_CHECK
+              range >>= 1;
+              code -= range & (((code - range) >> 31) - 1);
+              /* if (code >= range) code -= range; */
+            }
+            while (--numDirectBits);
+            prob = probs + Align;
+            numDirectBits = kNumAlignBits;
+          }
+          {
+            unsigned i = 1;
+            unsigned m = 1;
+            do
+            {
+              REV_BIT_CHECK(prob, i, m);
+            }
+            while (--numDirectBits);
+          }
+        }
+      }
+    }
+  }
+  NORMALIZE_CHECK;
+  return res;
+}
+
+
+void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState)
+{
+  p->remainLen = kMatchSpecLenStart + 1;
+  p->tempBufSize = 0;
+
+  if (initDic)
+  {
+    p->processedPos = 0;
+    p->checkDicSize = 0;
+    p->remainLen = kMatchSpecLenStart + 2;
+  }
+  if (initState)
+    p->remainLen = kMatchSpecLenStart + 2;
+}
+
+void LzmaDec_Init(CLzmaDec *p)
+{
+  p->dicPos = 0;
+  LzmaDec_InitDicAndState(p, True, True);
+}
+
+
+SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen,
+    ELzmaFinishMode finishMode, ELzmaStatus *status)
+{
+  SizeT inSize = *srcLen;
+  (*srcLen) = 0;
+  
+  *status = LZMA_STATUS_NOT_SPECIFIED;
+
+  if (p->remainLen > kMatchSpecLenStart)
+  {
+    for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)
+      p->tempBuf[p->tempBufSize++] = *src++;
+    if (p->tempBufSize != 0 && p->tempBuf[0] != 0)
+      return SZ_ERROR_DATA;
+    if (p->tempBufSize < RC_INIT_SIZE)
+    {
+      *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+      return SZ_OK;
+    }
+    p->code =
+        ((UInt32)p->tempBuf[1] << 24)
+      | ((UInt32)p->tempBuf[2] << 16)
+      | ((UInt32)p->tempBuf[3] << 8)
+      | ((UInt32)p->tempBuf[4]);
+    p->range = 0xFFFFFFFF;
+    p->tempBufSize = 0;
+
+    if (p->remainLen > kMatchSpecLenStart + 1)
+    {
+      SizeT numProbs = LzmaProps_GetNumProbs(&p->prop);
+      SizeT i;
+      CLzmaProb *probs = p->probs;
+      for (i = 0; i < numProbs; i++)
+        probs[i] = kBitModelTotal >> 1;
+      p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1;
+      p->state = 0;
+    }
+
+    p->remainLen = 0;
+  }
+
+  LzmaDec_WriteRem(p, dicLimit);
+
+  while (p->remainLen != kMatchSpecLenStart)
+  {
+      int checkEndMarkNow = 0;
+
+      if (p->dicPos >= dicLimit)
+      {
+        if (p->remainLen == 0 && p->code == 0)
+        {
+          *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK;
+          return SZ_OK;
+        }
+        if (finishMode == LZMA_FINISH_ANY)
+        {
+          *status = LZMA_STATUS_NOT_FINISHED;
+          return SZ_OK;
+        }
+        if (p->remainLen != 0)
+        {
+          *status = LZMA_STATUS_NOT_FINISHED;
+          return SZ_ERROR_DATA;
+        }
+        checkEndMarkNow = 1;
+      }
+
+      if (p->tempBufSize == 0)
+      {
+        SizeT processed;
+        const Byte *bufLimit;
+        if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
+        {
+          int dummyRes = LzmaDec_TryDummy(p, src, inSize);
+          if (dummyRes == DUMMY_ERROR)
+          {
+            memcpy(p->tempBuf, src, inSize);
+            p->tempBufSize = (unsigned)inSize;
+            (*srcLen) += inSize;
+            *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+            return SZ_OK;
+          }
+          if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
+          {
+            *status = LZMA_STATUS_NOT_FINISHED;
+            return SZ_ERROR_DATA;
+          }
+          bufLimit = src;
+        }
+        else
+          bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX;
+        p->buf = src;
+        if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0)
+          return SZ_ERROR_DATA;
+        processed = (SizeT)(p->buf - src);
+        (*srcLen) += processed;
+        src += processed;
+        inSize -= processed;
+      }
+      else
+      {
+        unsigned rem = p->tempBufSize, lookAhead = 0;
+        while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize)
+          p->tempBuf[rem++] = src[lookAhead++];
+        p->tempBufSize = rem;
+        if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
+        {
+          int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, (SizeT)rem);
+          if (dummyRes == DUMMY_ERROR)
+          {
+            (*srcLen) += (SizeT)lookAhead;
+            *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+            return SZ_OK;
+          }
+          if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
+          {
+            *status = LZMA_STATUS_NOT_FINISHED;
+            return SZ_ERROR_DATA;
+          }
+        }
+        p->buf = p->tempBuf;
+        if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0)
+          return SZ_ERROR_DATA;
+        
+        {
+          unsigned kkk = (unsigned)(p->buf - p->tempBuf);
+          if (rem < kkk)
+            return SZ_ERROR_FAIL; /* some internal error */
+          rem -= kkk;
+          if (lookAhead < rem)
+            return SZ_ERROR_FAIL; /* some internal error */
+          lookAhead -= rem;
+        }
+        (*srcLen) += (SizeT)lookAhead;
+        src += lookAhead;
+        inSize -= (SizeT)lookAhead;
+        p->tempBufSize = 0;
+      }
+  }
+  
+  if (p->code != 0)
+    return SZ_ERROR_DATA;
+  *status = LZMA_STATUS_FINISHED_WITH_MARK;
+  return SZ_OK;
+}
+
+
+SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
+{
+  SizeT outSize = *destLen;
+  SizeT inSize = *srcLen;
+  *srcLen = *destLen = 0;
+  for (;;)
+  {
+    SizeT inSizeCur = inSize, outSizeCur, dicPos;
+    ELzmaFinishMode curFinishMode;
+    SRes res;
+    if (p->dicPos == p->dicBufSize)
+      p->dicPos = 0;
+    dicPos = p->dicPos;
+    if (outSize > p->dicBufSize - dicPos)
+    {
+      outSizeCur = p->dicBufSize;
+      curFinishMode = LZMA_FINISH_ANY;
+    }
+    else
+    {
+      outSizeCur = dicPos + outSize;
+      curFinishMode = finishMode;
+    }
+
+    res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status);
+    src += inSizeCur;
+    inSize -= inSizeCur;
+    *srcLen += inSizeCur;
+    outSizeCur = p->dicPos - dicPos;
+    memcpy(dest, p->dic + dicPos, outSizeCur);
+    dest += outSizeCur;
+    outSize -= outSizeCur;
+    *destLen += outSizeCur;
+    if (res != 0)
+      return res;
+    if (outSizeCur == 0 || outSize == 0)
+      return SZ_OK;
+  }
+}
+
+void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc)
+{
+  ISzAlloc_Free(alloc, p->probs);
+  p->probs = NULL;
+}
+
+static void LzmaDec_FreeDict(CLzmaDec *p, ISzAllocPtr alloc)
+{
+  ISzAlloc_Free(alloc, p->dic);
+  p->dic = NULL;
+}
+
+void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc)
+{
+  LzmaDec_FreeProbs(p, alloc);
+  LzmaDec_FreeDict(p, alloc);
+}
+
+SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size)
+{
+  UInt32 dicSize;
+  Byte d;
+  
+  if (size < LZMA_PROPS_SIZE)
+    return SZ_ERROR_UNSUPPORTED;
+  else
+    dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24);
+ 
+  if (dicSize < LZMA_DIC_MIN)
+    dicSize = LZMA_DIC_MIN;
+  p->dicSize = dicSize;
+
+  d = data[0];
+  if (d >= (9 * 5 * 5))
+    return SZ_ERROR_UNSUPPORTED;
+
+  p->lc = (Byte)(d % 9);
+  d /= 9;
+  p->pb = (Byte)(d / 5);
+  p->lp = (Byte)(d % 5);
+
+  return SZ_OK;
+}
+
+static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAllocPtr alloc)
+{
+  UInt32 numProbs = LzmaProps_GetNumProbs(propNew);
+  if (!p->probs || numProbs != p->numProbs)
+  {
+    LzmaDec_FreeProbs(p, alloc);
+    p->probs = (CLzmaProb *)ISzAlloc_Alloc(alloc, numProbs * sizeof(CLzmaProb));
+    if (!p->probs)
+      return SZ_ERROR_MEM;
+    p->probs_1664 = p->probs + 1664;
+    p->numProbs = numProbs;
+  }
+  return SZ_OK;
+}
+
+SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)
+{
+  CLzmaProps propNew;
+  RINOK(LzmaProps_Decode(&propNew, props, propsSize));
+  RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
+  p->prop = propNew;
+  return SZ_OK;
+}
+
+SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)
+{
+  CLzmaProps propNew;
+  SizeT dicBufSize;
+  RINOK(LzmaProps_Decode(&propNew, props, propsSize));
+  RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
+
+  {
+    UInt32 dictSize = propNew.dicSize;
+    SizeT mask = ((UInt32)1 << 12) - 1;
+         if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1;
+    else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;;
+    dicBufSize = ((SizeT)dictSize + mask) & ~mask;
+    if (dicBufSize < dictSize)
+      dicBufSize = dictSize;
+  }
+
+  if (!p->dic || dicBufSize != p->dicBufSize)
+  {
+    LzmaDec_FreeDict(p, alloc);
+    p->dic = (Byte *)ISzAlloc_Alloc(alloc, dicBufSize);
+    if (!p->dic)
+    {
+      LzmaDec_FreeProbs(p, alloc);
+      return SZ_ERROR_MEM;
+    }
+  }
+  p->dicBufSize = dicBufSize;
+  p->prop = propNew;
+  return SZ_OK;
+}
+
+SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+    const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
+    ELzmaStatus *status, ISzAllocPtr alloc)
+{
+  CLzmaDec p;
+  SRes res;
+  SizeT outSize = *destLen, inSize = *srcLen;
+  *destLen = *srcLen = 0;
+  *status = LZMA_STATUS_NOT_SPECIFIED;
+  if (inSize < RC_INIT_SIZE)
+    return SZ_ERROR_INPUT_EOF;
+  LzmaDec_Construct(&p);
+  RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc));
+  p.dic = dest;
+  p.dicBufSize = outSize;
+  LzmaDec_Init(&p);
+  *srcLen = inSize;
+  res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status);
+  *destLen = p.dicPos;
+  if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
+    res = SZ_ERROR_INPUT_EOF;
+  LzmaDec_FreeProbs(&p, alloc);
+  return res;
+}
diff --git a/libraries/lzma/C/LzmaDec.h b/libraries/lzma/C/LzmaDec.h
new file mode 100644
index 000000000..1f0927ab1
--- /dev/null
+++ b/libraries/lzma/C/LzmaDec.h
@@ -0,0 +1,234 @@
+/* LzmaDec.h -- LZMA Decoder
+2018-04-21 : Igor Pavlov : Public domain */
+
+#ifndef __LZMA_DEC_H
+#define __LZMA_DEC_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+/* #define _LZMA_PROB32 */
+/* _LZMA_PROB32 can increase the speed on some CPUs,
+   but memory usage for CLzmaDec::probs will be doubled in that case */
+
+typedef
+#ifdef _LZMA_PROB32
+  UInt32
+#else
+  UInt16
+#endif
+  CLzmaProb;
+
+
+/* ---------- LZMA Properties ---------- */
+
+#define LZMA_PROPS_SIZE 5
+
+typedef struct _CLzmaProps
+{
+  Byte lc;
+  Byte lp;
+  Byte pb;
+  Byte _pad_;
+  UInt32 dicSize;
+} CLzmaProps;
+
+/* LzmaProps_Decode - decodes properties
+Returns:
+  SZ_OK
+  SZ_ERROR_UNSUPPORTED - Unsupported properties
+*/
+
+SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size);
+
+
+/* ---------- LZMA Decoder state ---------- */
+
+/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case.
+   Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */
+
+#define LZMA_REQUIRED_INPUT_MAX 20
+
+typedef struct
+{
+  /* Don't change this structure. ASM code can use it. */
+  CLzmaProps prop;
+  CLzmaProb *probs;
+  CLzmaProb *probs_1664;
+  Byte *dic;
+  SizeT dicBufSize;
+  SizeT dicPos;
+  const Byte *buf;
+  UInt32 range;
+  UInt32 code;
+  UInt32 processedPos;
+  UInt32 checkDicSize;
+  UInt32 reps[4];
+  UInt32 state;
+  UInt32 remainLen;
+
+  UInt32 numProbs;
+  unsigned tempBufSize;
+  Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];
+} CLzmaDec;
+
+#define LzmaDec_Construct(p) { (p)->dic = NULL; (p)->probs = NULL; }
+
+void LzmaDec_Init(CLzmaDec *p);
+
+/* There are two types of LZMA streams:
+     - Stream with end mark. That end mark adds about 6 bytes to compressed size.
+     - Stream without end mark. You must know exact uncompressed size to decompress such stream. */
+
+typedef enum
+{
+  LZMA_FINISH_ANY,   /* finish at any point */
+  LZMA_FINISH_END    /* block must be finished at the end */
+} ELzmaFinishMode;
+
+/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!!
+
+   You must use LZMA_FINISH_END, when you know that current output buffer
+   covers last bytes of block. In other cases you must use LZMA_FINISH_ANY.
+
+   If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK,
+   and output value of destLen will be less than output buffer size limit.
+   You can check status result also.
+
+   You can use multiple checks to test data integrity after full decompression:
+     1) Check Result and "status" variable.
+     2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize.
+     3) Check that output(srcLen) = compressedSize, if you know real compressedSize.
+        You must use correct finish mode in that case. */
+
+typedef enum
+{
+  LZMA_STATUS_NOT_SPECIFIED,               /* use main error code instead */
+  LZMA_STATUS_FINISHED_WITH_MARK,          /* stream was finished with end mark. */
+  LZMA_STATUS_NOT_FINISHED,                /* stream was not finished */
+  LZMA_STATUS_NEEDS_MORE_INPUT,            /* you must provide more input bytes */
+  LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK  /* there is probability that stream was finished without end mark */
+} ELzmaStatus;
+
+/* ELzmaStatus is used only as output value for function call */
+
+
+/* ---------- Interfaces ---------- */
+
+/* There are 3 levels of interfaces:
+     1) Dictionary Interface
+     2) Buffer Interface
+     3) One Call Interface
+   You can select any of these interfaces, but don't mix functions from different
+   groups for same object. */
+
+
+/* There are two variants to allocate state for Dictionary Interface:
+     1) LzmaDec_Allocate / LzmaDec_Free
+     2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs
+   You can use variant 2, if you set dictionary buffer manually.
+   For Buffer Interface you must always use variant 1.
+
+LzmaDec_Allocate* can return:
+  SZ_OK
+  SZ_ERROR_MEM         - Memory allocation error
+  SZ_ERROR_UNSUPPORTED - Unsupported properties
+*/
+   
+SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc);
+void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc);
+
+SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc);
+void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc);
+
+/* ---------- Dictionary Interface ---------- */
+
+/* You can use it, if you want to eliminate the overhead for data copying from
+   dictionary to some other external buffer.
+   You must work with CLzmaDec variables directly in this interface.
+
+   STEPS:
+     LzmaDec_Construct()
+     LzmaDec_Allocate()
+     for (each new stream)
+     {
+       LzmaDec_Init()
+       while (it needs more decompression)
+       {
+         LzmaDec_DecodeToDic()
+         use data from CLzmaDec::dic and update CLzmaDec::dicPos
+       }
+     }
+     LzmaDec_Free()
+*/
+
+/* LzmaDec_DecodeToDic
+   
+   The decoding to internal dictionary buffer (CLzmaDec::dic).
+   You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!!
+
+finishMode:
+  It has meaning only if the decoding reaches output limit (dicLimit).
+  LZMA_FINISH_ANY - Decode just dicLimit bytes.
+  LZMA_FINISH_END - Stream must be finished after dicLimit.
+
+Returns:
+  SZ_OK
+    status:
+      LZMA_STATUS_FINISHED_WITH_MARK
+      LZMA_STATUS_NOT_FINISHED
+      LZMA_STATUS_NEEDS_MORE_INPUT
+      LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
+  SZ_ERROR_DATA - Data error
+*/
+
+SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit,
+    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
+
+
+/* ---------- Buffer Interface ---------- */
+
+/* It's zlib-like interface.
+   See LzmaDec_DecodeToDic description for information about STEPS and return results,
+   but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need
+   to work with CLzmaDec variables manually.
+
+finishMode:
+  It has meaning only if the decoding reaches output limit (*destLen).
+  LZMA_FINISH_ANY - Decode just destLen bytes.
+  LZMA_FINISH_END - Stream must be finished after (*destLen).
+*/
+
+SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen,
+    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
+
+
+/* ---------- One Call Interface ---------- */
+
+/* LzmaDecode
+
+finishMode:
+  It has meaning only if the decoding reaches output limit (*destLen).
+  LZMA_FINISH_ANY - Decode just destLen bytes.
+  LZMA_FINISH_END - Stream must be finished after (*destLen).
+
+Returns:
+  SZ_OK
+    status:
+      LZMA_STATUS_FINISHED_WITH_MARK
+      LZMA_STATUS_NOT_FINISHED
+      LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
+  SZ_ERROR_DATA - Data error
+  SZ_ERROR_MEM  - Memory allocation error
+  SZ_ERROR_UNSUPPORTED - Unsupported properties
+  SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
+*/
+
+SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+    const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
+    ELzmaStatus *status, ISzAllocPtr alloc);
+
+EXTERN_C_END
+
+#endif
diff --git a/libraries/lzma/C/LzmaEnc.c b/libraries/lzma/C/LzmaEnc.c
new file mode 100644
index 000000000..46a0db000
--- /dev/null
+++ b/libraries/lzma/C/LzmaEnc.c
@@ -0,0 +1,2976 @@
+/* LzmaEnc.c -- LZMA Encoder
+2019-01-10: Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include <string.h>
+
+/* #define SHOW_STAT */
+/* #define SHOW_STAT2 */
+
+#if defined(SHOW_STAT) || defined(SHOW_STAT2)
+#include <stdio.h>
+#endif
+
+#include "LzmaEnc.h"
+
+#include "LzFind.h"
+#ifndef _7ZIP_ST
+#include "LzFindMt.h"
+#endif
+
+#ifdef SHOW_STAT
+static unsigned g_STAT_OFFSET = 0;
+#endif
+
+#define kLzmaMaxHistorySize ((UInt32)3 << 29)
+/* #define kLzmaMaxHistorySize ((UInt32)7 << 29) */
+
+#define kNumTopBits 24
+#define kTopValue ((UInt32)1 << kNumTopBits)
+
+#define kNumBitModelTotalBits 11
+#define kBitModelTotal (1 << kNumBitModelTotalBits)
+#define kNumMoveBits 5
+#define kProbInitValue (kBitModelTotal >> 1)
+
+#define kNumMoveReducingBits 4
+#define kNumBitPriceShiftBits 4
+#define kBitPrice (1 << kNumBitPriceShiftBits)
+
+#define REP_LEN_COUNT 64
+
+void LzmaEncProps_Init(CLzmaEncProps *p)
+{
+  p->level = 5;
+  p->dictSize = p->mc = 0;
+  p->reduceSize = (UInt64)(Int64)-1;
+  p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1;
+  p->writeEndMark = 0;
+}
+
+void LzmaEncProps_Normalize(CLzmaEncProps *p)
+{
+  int level = p->level;
+  if (level < 0) level = 5;
+  p->level = level;
+  
+  if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level <= 7 ? (1 << 25) : (1 << 26)));
+  if (p->dictSize > p->reduceSize)
+  {
+    unsigned i;
+    UInt32 reduceSize = (UInt32)p->reduceSize;
+    for (i = 11; i <= 30; i++)
+    {
+      if (reduceSize <= ((UInt32)2 << i)) { p->dictSize = ((UInt32)2 << i); break; }
+      if (reduceSize <= ((UInt32)3 << i)) { p->dictSize = ((UInt32)3 << i); break; }
+    }
+  }
+
+  if (p->lc < 0) p->lc = 3;
+  if (p->lp < 0) p->lp = 0;
+  if (p->pb < 0) p->pb = 2;
+
+  if (p->algo < 0) p->algo = (level < 5 ? 0 : 1);
+  if (p->fb < 0) p->fb = (level < 7 ? 32 : 64);
+  if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1);
+  if (p->numHashBytes < 0) p->numHashBytes = 4;
+  if (p->mc == 0) p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1);
+  
+  if (p->numThreads < 0)
+    p->numThreads =
+      #ifndef _7ZIP_ST
+      ((p->btMode && p->algo) ? 2 : 1);
+      #else
+      1;
+      #endif
+}
+
+UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2)
+{
+  CLzmaEncProps props = *props2;
+  LzmaEncProps_Normalize(&props);
+  return props.dictSize;
+}
+
+#if (_MSC_VER >= 1400)
+/* BSR code is fast for some new CPUs */
+/* #define LZMA_LOG_BSR */
+#endif
+
+#ifdef LZMA_LOG_BSR
+
+#define kDicLogSizeMaxCompress 32
+
+#define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); res = (zz + zz) + ((pos >> (zz - 1)) & 1); }
+
+static unsigned GetPosSlot1(UInt32 pos)
+{
+  unsigned res;
+  BSR2_RET(pos, res);
+  return res;
+}
+#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
+#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); }
+
+#else
+
+#define kNumLogBits (9 + sizeof(size_t) / 2)
+/* #define kNumLogBits (11 + sizeof(size_t) / 8 * 3) */
+
+#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7)
+
+static void LzmaEnc_FastPosInit(Byte *g_FastPos)
+{
+  unsigned slot;
+  g_FastPos[0] = 0;
+  g_FastPos[1] = 1;
+  g_FastPos += 2;
+  
+  for (slot = 2; slot < kNumLogBits * 2; slot++)
+  {
+    size_t k = ((size_t)1 << ((slot >> 1) - 1));
+    size_t j;
+    for (j = 0; j < k; j++)
+      g_FastPos[j] = (Byte)slot;
+    g_FastPos += k;
+  }
+}
+
+/* we can use ((limit - pos) >> 31) only if (pos < ((UInt32)1 << 31)) */
+/*
+#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \
+  (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \
+  res = p->g_FastPos[pos >> zz] + (zz * 2); }
+*/
+
+/*
+#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \
+  (0 - (((((UInt32)1 << (kNumLogBits)) - 1) - (pos >> 6)) >> 31))); \
+  res = p->g_FastPos[pos >> zz] + (zz * 2); }
+*/
+
+#define BSR2_RET(pos, res) { unsigned zz = (pos < (1 << (kNumLogBits + 6))) ? 6 : 6 + kNumLogBits - 1; \
+  res = p->g_FastPos[pos >> zz] + (zz * 2); }
+
+/*
+#define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \
+  p->g_FastPos[pos >> 6] + 12 : \
+  p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; }
+*/
+
+#define GetPosSlot1(pos) p->g_FastPos[pos]
+#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
+#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos & (kNumFullDistances - 1)]; else BSR2_RET(pos, res); }
+
+#endif
+
+
+#define LZMA_NUM_REPS 4
+
+typedef UInt16 CState;
+typedef UInt16 CExtra;
+
+typedef struct
+{
+  UInt32 price;
+  CState state;
+  CExtra extra;
+      // 0   : normal
+      // 1   : LIT : MATCH
+      // > 1 : MATCH (extra-1) : LIT : REP0 (len)
+  UInt32 len;
+  UInt32 dist;
+  UInt32 reps[LZMA_NUM_REPS];
+} COptimal;
+
+
+// 18.06
+#define kNumOpts (1 << 11)
+#define kPackReserve (kNumOpts * 8)
+// #define kNumOpts (1 << 12)
+// #define kPackReserve (1 + kNumOpts * 2)
+
+#define kNumLenToPosStates 4
+#define kNumPosSlotBits 6
+#define kDicLogSizeMin 0
+#define kDicLogSizeMax 32
+#define kDistTableSizeMax (kDicLogSizeMax * 2)
+
+#define kNumAlignBits 4
+#define kAlignTableSize (1 << kNumAlignBits)
+#define kAlignMask (kAlignTableSize - 1)
+
+#define kStartPosModelIndex 4
+#define kEndPosModelIndex 14
+#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
+
+typedef
+#ifdef _LZMA_PROB32
+  UInt32
+#else
+  UInt16
+#endif
+  CLzmaProb;
+
+#define LZMA_PB_MAX 4
+#define LZMA_LC_MAX 8
+#define LZMA_LP_MAX 4
+
+#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX)
+
+#define kLenNumLowBits 3
+#define kLenNumLowSymbols (1 << kLenNumLowBits)
+#define kLenNumHighBits 8
+#define kLenNumHighSymbols (1 << kLenNumHighBits)
+#define kLenNumSymbolsTotal (kLenNumLowSymbols * 2 + kLenNumHighSymbols)
+
+#define LZMA_MATCH_LEN_MIN 2
+#define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1)
+
+#define kNumStates 12
+
+
+typedef struct
+{
+  CLzmaProb low[LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)];
+  CLzmaProb high[kLenNumHighSymbols];
+} CLenEnc;
+
+
+typedef struct
+{
+  unsigned tableSize;
+  UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal];
+  // UInt32 prices1[LZMA_NUM_PB_STATES_MAX][kLenNumLowSymbols * 2];
+  // UInt32 prices2[kLenNumSymbolsTotal];
+} CLenPriceEnc;
+
+#define GET_PRICE_LEN(p, posState, len) \
+    ((p)->prices[posState][(size_t)(len) - LZMA_MATCH_LEN_MIN])
+
+/*
+#define GET_PRICE_LEN(p, posState, len) \
+    ((p)->prices2[(size_t)(len) - 2] + ((p)->prices1[posState][((len) - 2) & (kLenNumLowSymbols * 2 - 1)] & (((len) - 2 - kLenNumLowSymbols * 2) >> 9)))
+*/
+
+typedef struct
+{
+  UInt32 range;
+  unsigned cache;
+  UInt64 low;
+  UInt64 cacheSize;
+  Byte *buf;
+  Byte *bufLim;
+  Byte *bufBase;
+  ISeqOutStream *outStream;
+  UInt64 processed;
+  SRes res;
+} CRangeEnc;
+
+
+typedef struct
+{
+  CLzmaProb *litProbs;
+
+  unsigned state;
+  UInt32 reps[LZMA_NUM_REPS];
+
+  CLzmaProb posAlignEncoder[1 << kNumAlignBits];
+  CLzmaProb isRep[kNumStates];
+  CLzmaProb isRepG0[kNumStates];
+  CLzmaProb isRepG1[kNumStates];
+  CLzmaProb isRepG2[kNumStates];
+  CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
+  CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];
+
+  CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];
+  CLzmaProb posEncoders[kNumFullDistances];
+  
+  CLenEnc lenProbs;
+  CLenEnc repLenProbs;
+
+} CSaveState;
+
+
+typedef UInt32 CProbPrice;
+
+
+typedef struct
+{
+  void *matchFinderObj;
+  IMatchFinder matchFinder;
+
+  unsigned optCur;
+  unsigned optEnd;
+
+  unsigned longestMatchLen;
+  unsigned numPairs;
+  UInt32 numAvail;
+
+  unsigned state;
+  unsigned numFastBytes;
+  unsigned additionalOffset;
+  UInt32 reps[LZMA_NUM_REPS];
+  unsigned lpMask, pbMask;
+  CLzmaProb *litProbs;
+  CRangeEnc rc;
+
+  UInt32 backRes;
+
+  unsigned lc, lp, pb;
+  unsigned lclp;
+
+  BoolInt fastMode;
+  BoolInt writeEndMark;
+  BoolInt finished;
+  BoolInt multiThread;
+  BoolInt needInit;
+  // BoolInt _maxMode;
+
+  UInt64 nowPos64;
+  
+  unsigned matchPriceCount;
+  // unsigned alignPriceCount;
+  int repLenEncCounter;
+
+  unsigned distTableSize;
+
+  UInt32 dictSize;
+  SRes result;
+
+  #ifndef _7ZIP_ST
+  BoolInt mtMode;
+  // begin of CMatchFinderMt is used in LZ thread
+  CMatchFinderMt matchFinderMt;
+  // end of CMatchFinderMt is used in BT and HASH threads
+  #endif
+
+  CMatchFinder matchFinderBase;
+
+  #ifndef _7ZIP_ST
+  Byte pad[128];
+  #endif
+  
+  // LZ thread
+  CProbPrice ProbPrices[kBitModelTotal >> kNumMoveReducingBits];
+
+  UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1];
+
+  UInt32 alignPrices[kAlignTableSize];
+  UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax];
+  UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances];
+
+  CLzmaProb posAlignEncoder[1 << kNumAlignBits];
+  CLzmaProb isRep[kNumStates];
+  CLzmaProb isRepG0[kNumStates];
+  CLzmaProb isRepG1[kNumStates];
+  CLzmaProb isRepG2[kNumStates];
+  CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
+  CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];
+  CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];
+  CLzmaProb posEncoders[kNumFullDistances];
+  
+  CLenEnc lenProbs;
+  CLenEnc repLenProbs;
+
+  #ifndef LZMA_LOG_BSR
+  Byte g_FastPos[1 << kNumLogBits];
+  #endif
+
+  CLenPriceEnc lenEnc;
+  CLenPriceEnc repLenEnc;
+
+  COptimal opt[kNumOpts];
+
+  CSaveState saveState;
+
+  #ifndef _7ZIP_ST
+  Byte pad2[128];
+  #endif
+} CLzmaEnc;
+
+
+
+#define COPY_ARR(dest, src, arr) memcpy(dest->arr, src->arr, sizeof(src->arr));
+
+void LzmaEnc_SaveState(CLzmaEncHandle pp)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  CSaveState *dest = &p->saveState;
+  
+  dest->state = p->state;
+  
+  dest->lenProbs = p->lenProbs;
+  dest->repLenProbs = p->repLenProbs;
+
+  COPY_ARR(dest, p, reps);
+
+  COPY_ARR(dest, p, posAlignEncoder);
+  COPY_ARR(dest, p, isRep);
+  COPY_ARR(dest, p, isRepG0);
+  COPY_ARR(dest, p, isRepG1);
+  COPY_ARR(dest, p, isRepG2);
+  COPY_ARR(dest, p, isMatch);
+  COPY_ARR(dest, p, isRep0Long);
+  COPY_ARR(dest, p, posSlotEncoder);
+  COPY_ARR(dest, p, posEncoders);
+
+  memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << p->lclp) * sizeof(CLzmaProb));
+}
+
+
+void LzmaEnc_RestoreState(CLzmaEncHandle pp)
+{
+  CLzmaEnc *dest = (CLzmaEnc *)pp;
+  const CSaveState *p = &dest->saveState;
+
+  dest->state = p->state;
+
+  dest->lenProbs = p->lenProbs;
+  dest->repLenProbs = p->repLenProbs;
+  
+  COPY_ARR(dest, p, reps);
+  
+  COPY_ARR(dest, p, posAlignEncoder);
+  COPY_ARR(dest, p, isRep);
+  COPY_ARR(dest, p, isRepG0);
+  COPY_ARR(dest, p, isRepG1);
+  COPY_ARR(dest, p, isRepG2);
+  COPY_ARR(dest, p, isMatch);
+  COPY_ARR(dest, p, isRep0Long);
+  COPY_ARR(dest, p, posSlotEncoder);
+  COPY_ARR(dest, p, posEncoders);
+
+  memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << dest->lclp) * sizeof(CLzmaProb));
+}
+
+
+
+SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  CLzmaEncProps props = *props2;
+  LzmaEncProps_Normalize(&props);
+
+  if (props.lc > LZMA_LC_MAX
+      || props.lp > LZMA_LP_MAX
+      || props.pb > LZMA_PB_MAX
+      || props.dictSize > ((UInt64)1 << kDicLogSizeMaxCompress)
+      || props.dictSize > kLzmaMaxHistorySize)
+    return SZ_ERROR_PARAM;
+
+  p->dictSize = props.dictSize;
+  {
+    unsigned fb = props.fb;
+    if (fb < 5)
+      fb = 5;
+    if (fb > LZMA_MATCH_LEN_MAX)
+      fb = LZMA_MATCH_LEN_MAX;
+    p->numFastBytes = fb;
+  }
+  p->lc = props.lc;
+  p->lp = props.lp;
+  p->pb = props.pb;
+  p->fastMode = (props.algo == 0);
+  // p->_maxMode = True;
+  p->matchFinderBase.btMode = (Byte)(props.btMode ? 1 : 0);
+  {
+    unsigned numHashBytes = 4;
+    if (props.btMode)
+    {
+      if (props.numHashBytes < 2)
+        numHashBytes = 2;
+      else if (props.numHashBytes < 4)
+        numHashBytes = props.numHashBytes;
+    }
+    p->matchFinderBase.numHashBytes = numHashBytes;
+  }
+
+  p->matchFinderBase.cutValue = props.mc;
+
+  p->writeEndMark = props.writeEndMark;
+
+  #ifndef _7ZIP_ST
+  /*
+  if (newMultiThread != _multiThread)
+  {
+    ReleaseMatchFinder();
+    _multiThread = newMultiThread;
+  }
+  */
+  p->multiThread = (props.numThreads > 1);
+  #endif
+
+  return SZ_OK;
+}
+
+
+void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  p->matchFinderBase.expectedDataSize = expectedDataSiize;
+}
+
+
+#define kState_Start 0
+#define kState_LitAfterMatch 4
+#define kState_LitAfterRep   5
+#define kState_MatchAfterLit 7
+#define kState_RepAfterLit   8
+
+static const Byte kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4,  5,  6,   4, 5};
+static const Byte kMatchNextStates[kNumStates]   = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10};
+static const Byte kRepNextStates[kNumStates]     = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11};
+static const Byte kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11};
+
+#define IsLitState(s) ((s) < 7)
+#define GetLenToPosState2(len) (((len) < kNumLenToPosStates - 1) ? (len) : kNumLenToPosStates - 1)
+#define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1)
+
+#define kInfinityPrice (1 << 30)
+
+static void RangeEnc_Construct(CRangeEnc *p)
+{
+  p->outStream = NULL;
+  p->bufBase = NULL;
+}
+
+#define RangeEnc_GetProcessed(p)       ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize)
+#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + ((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize)
+
+#define RC_BUF_SIZE (1 << 16)
+
+static int RangeEnc_Alloc(CRangeEnc *p, ISzAllocPtr alloc)
+{
+  if (!p->bufBase)
+  {
+    p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, RC_BUF_SIZE);
+    if (!p->bufBase)
+      return 0;
+    p->bufLim = p->bufBase + RC_BUF_SIZE;
+  }
+  return 1;
+}
+
+static void RangeEnc_Free(CRangeEnc *p, ISzAllocPtr alloc)
+{
+  ISzAlloc_Free(alloc, p->bufBase);
+  p->bufBase = 0;
+}
+
+static void RangeEnc_Init(CRangeEnc *p)
+{
+  /* Stream.Init(); */
+  p->range = 0xFFFFFFFF;
+  p->cache = 0;
+  p->low = 0;
+  p->cacheSize = 0;
+
+  p->buf = p->bufBase;
+
+  p->processed = 0;
+  p->res = SZ_OK;
+}
+
+MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p)
+{
+  size_t num;
+  if (p->res != SZ_OK)
+    return;
+  num = p->buf - p->bufBase;
+  if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num))
+    p->res = SZ_ERROR_WRITE;
+  p->processed += num;
+  p->buf = p->bufBase;
+}
+
+MY_NO_INLINE static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p)
+{
+  UInt32 low = (UInt32)p->low;
+  unsigned high = (unsigned)(p->low >> 32);
+  p->low = (UInt32)(low << 8);
+  if (low < (UInt32)0xFF000000 || high != 0)
+  {
+    {
+      Byte *buf = p->buf;
+      *buf++ = (Byte)(p->cache + high);
+      p->cache = (unsigned)(low >> 24);
+      p->buf = buf;
+      if (buf == p->bufLim)
+        RangeEnc_FlushStream(p);
+      if (p->cacheSize == 0)
+        return;
+    }
+    high += 0xFF;
+    for (;;)
+    {
+      Byte *buf = p->buf;
+      *buf++ = (Byte)(high);
+      p->buf = buf;
+      if (buf == p->bufLim)
+        RangeEnc_FlushStream(p);
+      if (--p->cacheSize == 0)
+        return;
+    }
+  }
+  p->cacheSize++;
+}
+
+static void RangeEnc_FlushData(CRangeEnc *p)
+{
+  int i;
+  for (i = 0; i < 5; i++)
+    RangeEnc_ShiftLow(p);
+}
+
+#define RC_NORM(p) if (range < kTopValue) { range <<= 8; RangeEnc_ShiftLow(p); }
+
+#define RC_BIT_PRE(p, prob) \
+  ttt = *(prob); \
+  newBound = (range >> kNumBitModelTotalBits) * ttt;
+
+// #define _LZMA_ENC_USE_BRANCH
+
+#ifdef _LZMA_ENC_USE_BRANCH
+
+#define RC_BIT(p, prob, bit) { \
+  RC_BIT_PRE(p, prob) \
+  if (bit == 0) { range = newBound; ttt += (kBitModelTotal - ttt) >> kNumMoveBits; } \
+  else { (p)->low += newBound; range -= newBound; ttt -= ttt >> kNumMoveBits; } \
+  *(prob) = (CLzmaProb)ttt; \
+  RC_NORM(p) \
+  }
+
+#else
+
+#define RC_BIT(p, prob, bit) { \
+  UInt32 mask; \
+  RC_BIT_PRE(p, prob) \
+  mask = 0 - (UInt32)bit; \
+  range &= mask; \
+  mask &= newBound; \
+  range -= mask; \
+  (p)->low += mask; \
+  mask = (UInt32)bit - 1; \
+  range += newBound & mask; \
+  mask &= (kBitModelTotal - ((1 << kNumMoveBits) - 1)); \
+  mask += ((1 << kNumMoveBits) - 1); \
+  ttt += (Int32)(mask - ttt) >> kNumMoveBits; \
+  *(prob) = (CLzmaProb)ttt; \
+  RC_NORM(p) \
+  }
+
+#endif
+
+
+
+
+#define RC_BIT_0_BASE(p, prob) \
+  range = newBound; *(prob) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
+
+#define RC_BIT_1_BASE(p, prob) \
+  range -= newBound; (p)->low += newBound; *(prob) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); \
+
+#define RC_BIT_0(p, prob) \
+  RC_BIT_0_BASE(p, prob) \
+  RC_NORM(p)
+
+#define RC_BIT_1(p, prob) \
+  RC_BIT_1_BASE(p, prob) \
+  RC_NORM(p)
+
+static void RangeEnc_EncodeBit_0(CRangeEnc *p, CLzmaProb *prob)
+{
+  UInt32 range, ttt, newBound;
+  range = p->range;
+  RC_BIT_PRE(p, prob)
+  RC_BIT_0(p, prob)
+  p->range = range;
+}
+
+static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 sym)
+{
+  UInt32 range = p->range;
+  sym |= 0x100;
+  do
+  {
+    UInt32 ttt, newBound;
+    // RangeEnc_EncodeBit(p, probs + (sym >> 8), (sym >> 7) & 1);
+    CLzmaProb *prob = probs + (sym >> 8);
+    UInt32 bit = (sym >> 7) & 1;
+    sym <<= 1;
+    RC_BIT(p, prob, bit);
+  }
+  while (sym < 0x10000);
+  p->range = range;
+}
+
+static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 sym, UInt32 matchByte)
+{
+  UInt32 range = p->range;
+  UInt32 offs = 0x100;
+  sym |= 0x100;
+  do
+  {
+    UInt32 ttt, newBound;
+    CLzmaProb *prob;
+    UInt32 bit;
+    matchByte <<= 1;
+    // RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (sym >> 8)), (sym >> 7) & 1);
+    prob = probs + (offs + (matchByte & offs) + (sym >> 8));
+    bit = (sym >> 7) & 1;
+    sym <<= 1;
+    offs &= ~(matchByte ^ sym);
+    RC_BIT(p, prob, bit);
+  }
+  while (sym < 0x10000);
+  p->range = range;
+}
+
+
+
+static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices)
+{
+  UInt32 i;
+  for (i = 0; i < (kBitModelTotal >> kNumMoveReducingBits); i++)
+  {
+    const unsigned kCyclesBits = kNumBitPriceShiftBits;
+    UInt32 w = (i << kNumMoveReducingBits) + (1 << (kNumMoveReducingBits - 1));
+    unsigned bitCount = 0;
+    unsigned j;
+    for (j = 0; j < kCyclesBits; j++)
+    {
+      w = w * w;
+      bitCount <<= 1;
+      while (w >= ((UInt32)1 << 16))
+      {
+        w >>= 1;
+        bitCount++;
+      }
+    }
+    ProbPrices[i] = (CProbPrice)((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount);
+    // printf("\n%3d: %5d", i, ProbPrices[i]);
+  }
+}
+
+
+#define GET_PRICE(prob, bit) \
+  p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];
+
+#define GET_PRICEa(prob, bit) \
+     ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];
+
+#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits]
+#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
+
+#define GET_PRICEa_0(prob) ProbPrices[(prob) >> kNumMoveReducingBits]
+#define GET_PRICEa_1(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
+
+
+static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 sym, const CProbPrice *ProbPrices)
+{
+  UInt32 price = 0;
+  sym |= 0x100;
+  do
+  {
+    unsigned bit = sym & 1;
+    sym >>= 1;
+    price += GET_PRICEa(probs[sym], bit);
+  }
+  while (sym >= 2);
+  return price;
+}
+
+
+static UInt32 LitEnc_Matched_GetPrice(const CLzmaProb *probs, UInt32 sym, UInt32 matchByte, const CProbPrice *ProbPrices)
+{
+  UInt32 price = 0;
+  UInt32 offs = 0x100;
+  sym |= 0x100;
+  do
+  {
+    matchByte <<= 1;
+    price += GET_PRICEa(probs[offs + (matchByte & offs) + (sym >> 8)], (sym >> 7) & 1);
+    sym <<= 1;
+    offs &= ~(matchByte ^ sym);
+  }
+  while (sym < 0x10000);
+  return price;
+}
+
+
+static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, unsigned numBits, unsigned sym)
+{
+  UInt32 range = rc->range;
+  unsigned m = 1;
+  do
+  {
+    UInt32 ttt, newBound;
+    unsigned bit = sym & 1;
+    // RangeEnc_EncodeBit(rc, probs + m, bit);
+    sym >>= 1;
+    RC_BIT(rc, probs + m, bit);
+    m = (m << 1) | bit;
+  }
+  while (--numBits);
+  rc->range = range;
+}
+
+
+
+static void LenEnc_Init(CLenEnc *p)
+{
+  unsigned i;
+  for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)); i++)
+    p->low[i] = kProbInitValue;
+  for (i = 0; i < kLenNumHighSymbols; i++)
+    p->high[i] = kProbInitValue;
+}
+
+static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posState)
+{
+  UInt32 range, ttt, newBound;
+  CLzmaProb *probs = p->low;
+  range = rc->range;
+  RC_BIT_PRE(rc, probs);
+  if (sym >= kLenNumLowSymbols)
+  {
+    RC_BIT_1(rc, probs);
+    probs += kLenNumLowSymbols;
+    RC_BIT_PRE(rc, probs);
+    if (sym >= kLenNumLowSymbols * 2)
+    {
+      RC_BIT_1(rc, probs);
+      rc->range = range;
+      // RcTree_Encode(rc, p->high, kLenNumHighBits, sym - kLenNumLowSymbols * 2);
+      LitEnc_Encode(rc, p->high, sym - kLenNumLowSymbols * 2);
+      return;
+    }
+    sym -= kLenNumLowSymbols;
+  }
+
+  // RcTree_Encode(rc, probs + (posState << kLenNumLowBits), kLenNumLowBits, sym);
+  {
+    unsigned m;
+    unsigned bit;
+    RC_BIT_0(rc, probs);
+    probs += (posState << (1 + kLenNumLowBits));
+    bit = (sym >> 2)    ; RC_BIT(rc, probs + 1, bit); m = (1 << 1) + bit;
+    bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit); m = (m << 1) + bit;
+    bit =  sym       & 1; RC_BIT(rc, probs + m, bit);
+    rc->range = range;
+  }
+}
+
+static void SetPrices_3(const CLzmaProb *probs, UInt32 startPrice, UInt32 *prices, const CProbPrice *ProbPrices)
+{
+  unsigned i;
+  for (i = 0; i < 8; i += 2)
+  {
+    UInt32 price = startPrice;
+    UInt32 prob;
+    price += GET_PRICEa(probs[1           ], (i >> 2));
+    price += GET_PRICEa(probs[2 + (i >> 2)], (i >> 1) & 1);
+    prob = probs[4 + (i >> 1)];
+    prices[i    ] = price + GET_PRICEa_0(prob);
+    prices[i + 1] = price + GET_PRICEa_1(prob);
+  }
+}
+
+
+MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables(
+    CLenPriceEnc *p,
+    unsigned numPosStates,
+    const CLenEnc *enc,
+    const CProbPrice *ProbPrices)
+{
+  UInt32 b;
+ 
+  {
+    unsigned prob = enc->low[0];
+    UInt32 a, c;
+    unsigned posState;
+    b = GET_PRICEa_1(prob);
+    a = GET_PRICEa_0(prob);
+    c = b + GET_PRICEa_0(enc->low[kLenNumLowSymbols]);
+    for (posState = 0; posState < numPosStates; posState++)
+    {
+      UInt32 *prices = p->prices[posState];
+      const CLzmaProb *probs = enc->low + (posState << (1 + kLenNumLowBits));
+      SetPrices_3(probs, a, prices, ProbPrices);
+      SetPrices_3(probs + kLenNumLowSymbols, c, prices + kLenNumLowSymbols, ProbPrices);
+    }
+  }
+
+  /*
+  {
+    unsigned i;
+    UInt32 b;
+    a = GET_PRICEa_0(enc->low[0]);
+    for (i = 0; i < kLenNumLowSymbols; i++)
+      p->prices2[i] = a;
+    a = GET_PRICEa_1(enc->low[0]);
+    b = a + GET_PRICEa_0(enc->low[kLenNumLowSymbols]);
+    for (i = kLenNumLowSymbols; i < kLenNumLowSymbols * 2; i++)
+      p->prices2[i] = b;
+    a += GET_PRICEa_1(enc->low[kLenNumLowSymbols]);
+  }
+  */
+ 
+  // p->counter = numSymbols;
+  // p->counter = 64;
+
+  {
+    unsigned i = p->tableSize;
+    
+    if (i > kLenNumLowSymbols * 2)
+    {
+      const CLzmaProb *probs = enc->high;
+      UInt32 *prices = p->prices[0] + kLenNumLowSymbols * 2;
+      i -= kLenNumLowSymbols * 2 - 1;
+      i >>= 1;
+      b += GET_PRICEa_1(enc->low[kLenNumLowSymbols]);
+      do
+      {
+        /*
+        p->prices2[i] = a +
+        // RcTree_GetPrice(enc->high, kLenNumHighBits, i - kLenNumLowSymbols * 2, ProbPrices);
+        LitEnc_GetPrice(probs, i - kLenNumLowSymbols * 2, ProbPrices);
+        */
+        // UInt32 price = a + RcTree_GetPrice(probs, kLenNumHighBits - 1, sym, ProbPrices);
+        unsigned sym = --i + (1 << (kLenNumHighBits - 1));
+        UInt32 price = b;
+        do
+        {
+          unsigned bit = sym & 1;
+          sym >>= 1;
+          price += GET_PRICEa(probs[sym], bit);
+        }
+        while (sym >= 2);
+
+        {
+          unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))];
+          prices[(size_t)i * 2    ] = price + GET_PRICEa_0(prob);
+          prices[(size_t)i * 2 + 1] = price + GET_PRICEa_1(prob);
+        }
+      }
+      while (i);
+
+      {
+        unsigned posState;
+        size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]);
+        for (posState = 1; posState < numPosStates; posState++)
+          memcpy(p->prices[posState] + kLenNumLowSymbols * 2, p->prices[0] + kLenNumLowSymbols * 2, num);
+      }
+    }
+  }
+}
+
+/*
+  #ifdef SHOW_STAT
+  g_STAT_OFFSET += num;
+  printf("\n MovePos %u", num);
+  #endif
+*/
+  
+#define MOVE_POS(p, num) { \
+    p->additionalOffset += (num); \
+    p->matchFinder.Skip(p->matchFinderObj, (UInt32)(num)); }
+
+
+static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
+{
+  unsigned numPairs;
+  
+  p->additionalOffset++;
+  p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
+  numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches);
+  *numPairsRes = numPairs;
+  
+  #ifdef SHOW_STAT
+  printf("\n i = %u numPairs = %u    ", g_STAT_OFFSET, numPairs / 2);
+  g_STAT_OFFSET++;
+  {
+    unsigned i;
+    for (i = 0; i < numPairs; i += 2)
+      printf("%2u %6u   | ", p->matches[i], p->matches[i + 1]);
+  }
+  #endif
+  
+  if (numPairs == 0)
+    return 0;
+  {
+    unsigned len = p->matches[(size_t)numPairs - 2];
+    if (len != p->numFastBytes)
+      return len;
+    {
+      UInt32 numAvail = p->numAvail;
+      if (numAvail > LZMA_MATCH_LEN_MAX)
+        numAvail = LZMA_MATCH_LEN_MAX;
+      {
+        const Byte *p1 = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
+        const Byte *p2 = p1 + len;
+        ptrdiff_t dif = (ptrdiff_t)-1 - p->matches[(size_t)numPairs - 1];
+        const Byte *lim = p1 + numAvail;
+        for (; p2 != lim && *p2 == p2[dif]; p2++)
+        {}
+        return (unsigned)(p2 - p1);
+      }
+    }
+  }
+}
+
+#define MARK_LIT ((UInt32)(Int32)-1)
+
+#define MakeAs_Lit(p)       { (p)->dist = MARK_LIT; (p)->extra = 0; }
+#define MakeAs_ShortRep(p)  { (p)->dist = 0; (p)->extra = 0; }
+#define IsShortRep(p)       ((p)->dist == 0)
+
+
+#define GetPrice_ShortRep(p, state, posState) \
+  ( GET_PRICE_0(p->isRepG0[state]) + GET_PRICE_0(p->isRep0Long[state][posState]))
+
+#define GetPrice_Rep_0(p, state, posState) ( \
+    GET_PRICE_1(p->isMatch[state][posState]) \
+  + GET_PRICE_1(p->isRep0Long[state][posState])) \
+  + GET_PRICE_1(p->isRep[state]) \
+  + GET_PRICE_0(p->isRepG0[state])
+  
+MY_FORCE_INLINE
+static UInt32 GetPrice_PureRep(const CLzmaEnc *p, unsigned repIndex, size_t state, size_t posState)
+{
+  UInt32 price;
+  UInt32 prob = p->isRepG0[state];
+  if (repIndex == 0)
+  {
+    price = GET_PRICE_0(prob);
+    price += GET_PRICE_1(p->isRep0Long[state][posState]);
+  }
+  else
+  {
+    price = GET_PRICE_1(prob);
+    prob = p->isRepG1[state];
+    if (repIndex == 1)
+      price += GET_PRICE_0(prob);
+    else
+    {
+      price += GET_PRICE_1(prob);
+      price += GET_PRICE(p->isRepG2[state], repIndex - 2);
+    }
+  }
+  return price;
+}
+
+
+static unsigned Backward(CLzmaEnc *p, unsigned cur)
+{
+  unsigned wr = cur + 1;
+  p->optEnd = wr;
+
+  for (;;)
+  {
+    UInt32 dist = p->opt[cur].dist;
+    unsigned len = (unsigned)p->opt[cur].len;
+    unsigned extra = (unsigned)p->opt[cur].extra;
+    cur -= len;
+
+    if (extra)
+    {
+      wr--;
+      p->opt[wr].len = (UInt32)len;
+      cur -= extra;
+      len = extra;
+      if (extra == 1)
+      {
+        p->opt[wr].dist = dist;
+        dist = MARK_LIT;
+      }
+      else
+      {
+        p->opt[wr].dist = 0;
+        len--;
+        wr--;
+        p->opt[wr].dist = MARK_LIT;
+        p->opt[wr].len = 1;
+      }
+    }
+
+    if (cur == 0)
+    {
+      p->backRes = dist;
+      p->optCur = wr;
+      return len;
+    }
+    
+    wr--;
+    p->opt[wr].dist = dist;
+    p->opt[wr].len = (UInt32)len;
+  }
+}
+
+
+
+#define LIT_PROBS(pos, prevByte) \
+  (p->litProbs + (UInt32)3 * (((((pos) << 8) + (prevByte)) & p->lpMask) << p->lc))
+
+
+static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
+{
+  unsigned last, cur;
+  UInt32 reps[LZMA_NUM_REPS];
+  unsigned repLens[LZMA_NUM_REPS];
+  UInt32 *matches;
+
+  {
+    UInt32 numAvail;
+    unsigned numPairs, mainLen, repMaxIndex, i, posState;
+    UInt32 matchPrice, repMatchPrice;
+    const Byte *data;
+    Byte curByte, matchByte;
+    
+    p->optCur = p->optEnd = 0;
+    
+    if (p->additionalOffset == 0)
+      mainLen = ReadMatchDistances(p, &numPairs);
+    else
+    {
+      mainLen = p->longestMatchLen;
+      numPairs = p->numPairs;
+    }
+    
+    numAvail = p->numAvail;
+    if (numAvail < 2)
+    {
+      p->backRes = MARK_LIT;
+      return 1;
+    }
+    if (numAvail > LZMA_MATCH_LEN_MAX)
+      numAvail = LZMA_MATCH_LEN_MAX;
+    
+    data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
+    repMaxIndex = 0;
+    
+    for (i = 0; i < LZMA_NUM_REPS; i++)
+    {
+      unsigned len;
+      const Byte *data2;
+      reps[i] = p->reps[i];
+      data2 = data - reps[i];
+      if (data[0] != data2[0] || data[1] != data2[1])
+      {
+        repLens[i] = 0;
+        continue;
+      }
+      for (len = 2; len < numAvail && data[len] == data2[len]; len++)
+      {}
+      repLens[i] = len;
+      if (len > repLens[repMaxIndex])
+        repMaxIndex = i;
+    }
+    
+    if (repLens[repMaxIndex] >= p->numFastBytes)
+    {
+      unsigned len;
+      p->backRes = (UInt32)repMaxIndex;
+      len = repLens[repMaxIndex];
+      MOVE_POS(p, len - 1)
+      return len;
+    }
+    
+    matches = p->matches;
+    
+    if (mainLen >= p->numFastBytes)
+    {
+      p->backRes = matches[(size_t)numPairs - 1] + LZMA_NUM_REPS;
+      MOVE_POS(p, mainLen - 1)
+      return mainLen;
+    }
+    
+    curByte = *data;
+    matchByte = *(data - reps[0]);
+
+    last = repLens[repMaxIndex];
+    if (last <= mainLen)
+      last = mainLen;
+    
+    if (last < 2 && curByte != matchByte)
+    {
+      p->backRes = MARK_LIT;
+      return 1;
+    }
+    
+    p->opt[0].state = (CState)p->state;
+    
+    posState = (position & p->pbMask);
+    
+    {
+      const CLzmaProb *probs = LIT_PROBS(position, *(data - 1));
+      p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) +
+        (!IsLitState(p->state) ?
+          LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) :
+          LitEnc_GetPrice(probs, curByte, p->ProbPrices));
+    }
+
+    MakeAs_Lit(&p->opt[1]);
+    
+    matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]);
+    repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]);
+    
+    // 18.06
+    if (matchByte == curByte && repLens[0] == 0)
+    {
+      UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, p->state, posState);
+      if (shortRepPrice < p->opt[1].price)
+      {
+        p->opt[1].price = shortRepPrice;
+        MakeAs_ShortRep(&p->opt[1]);
+      }
+      if (last < 2)
+      {
+        p->backRes = p->opt[1].dist;
+        return 1;
+      }
+    }
+   
+    p->opt[1].len = 1;
+    
+    p->opt[0].reps[0] = reps[0];
+    p->opt[0].reps[1] = reps[1];
+    p->opt[0].reps[2] = reps[2];
+    p->opt[0].reps[3] = reps[3];
+    
+    // ---------- REP ----------
+    
+    for (i = 0; i < LZMA_NUM_REPS; i++)
+    {
+      unsigned repLen = repLens[i];
+      UInt32 price;
+      if (repLen < 2)
+        continue;
+      price = repMatchPrice + GetPrice_PureRep(p, i, p->state, posState);
+      do
+      {
+        UInt32 price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState, repLen);
+        COptimal *opt = &p->opt[repLen];
+        if (price2 < opt->price)
+        {
+          opt->price = price2;
+          opt->len = (UInt32)repLen;
+          opt->dist = (UInt32)i;
+          opt->extra = 0;
+        }
+      }
+      while (--repLen >= 2);
+    }
+    
+    
+    // ---------- MATCH ----------
+    {
+      unsigned len = repLens[0] + 1;
+      if (len <= mainLen)
+      {
+        unsigned offs = 0;
+        UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]);
+
+        if (len < 2)
+          len = 2;
+        else
+          while (len > matches[offs])
+            offs += 2;
+    
+        for (; ; len++)
+        {
+          COptimal *opt;
+          UInt32 dist = matches[(size_t)offs + 1];
+          UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len);
+          unsigned lenToPosState = GetLenToPosState(len);
+       
+          if (dist < kNumFullDistances)
+            price += p->distancesPrices[lenToPosState][dist & (kNumFullDistances - 1)];
+          else
+          {
+            unsigned slot;
+            GetPosSlot2(dist, slot);
+            price += p->alignPrices[dist & kAlignMask];
+            price += p->posSlotPrices[lenToPosState][slot];
+          }
+          
+          opt = &p->opt[len];
+          
+          if (price < opt->price)
+          {
+            opt->price = price;
+            opt->len = (UInt32)len;
+            opt->dist = dist + LZMA_NUM_REPS;
+            opt->extra = 0;
+          }
+          
+          if (len == matches[offs])
+          {
+            offs += 2;
+            if (offs == numPairs)
+              break;
+          }
+        }
+      }
+    }
+    
+
+    cur = 0;
+
+    #ifdef SHOW_STAT2
+    /* if (position >= 0) */
+    {
+      unsigned i;
+      printf("\n pos = %4X", position);
+      for (i = cur; i <= last; i++)
+      printf("\nprice[%4X] = %u", position - cur + i, p->opt[i].price);
+    }
+    #endif
+  }
+
+
+  
+  // ---------- Optimal Parsing ----------
+
+  for (;;)
+  {
+    unsigned numAvail;
+    UInt32 numAvailFull;
+    unsigned newLen, numPairs, prev, state, posState, startLen;
+    UInt32 litPrice, matchPrice, repMatchPrice;
+    BoolInt nextIsLit;
+    Byte curByte, matchByte;
+    const Byte *data;
+    COptimal *curOpt, *nextOpt;
+
+    if (++cur == last)
+      break;
+    
+    // 18.06
+    if (cur >= kNumOpts - 64)
+    {
+      unsigned j, best;
+      UInt32 price = p->opt[cur].price;
+      best = cur;
+      for (j = cur + 1; j <= last; j++)
+      {
+        UInt32 price2 = p->opt[j].price;
+        if (price >= price2)
+        {
+          price = price2;
+          best = j;
+        }
+      }
+      {
+        unsigned delta = best - cur;
+        if (delta != 0)
+        {
+          MOVE_POS(p, delta);
+        }
+      }
+      cur = best;
+      break;
+    }
+
+    newLen = ReadMatchDistances(p, &numPairs);
+    
+    if (newLen >= p->numFastBytes)
+    {
+      p->numPairs = numPairs;
+      p->longestMatchLen = newLen;
+      break;
+    }
+    
+    curOpt = &p->opt[cur];
+
+    position++;
+
+    // we need that check here, if skip_items in p->opt are possible
+    /*
+    if (curOpt->price >= kInfinityPrice)
+      continue;
+    */
+
+    prev = cur - curOpt->len;
+
+    if (curOpt->len == 1)
+    {
+      state = (unsigned)p->opt[prev].state;
+      if (IsShortRep(curOpt))
+        state = kShortRepNextStates[state];
+      else
+        state = kLiteralNextStates[state];
+    }
+    else
+    {
+      const COptimal *prevOpt;
+      UInt32 b0;
+      UInt32 dist = curOpt->dist;
+
+      if (curOpt->extra)
+      {
+        prev -= (unsigned)curOpt->extra;
+        state = kState_RepAfterLit;
+        if (curOpt->extra == 1)
+          state = (dist < LZMA_NUM_REPS ? kState_RepAfterLit : kState_MatchAfterLit);
+      }
+      else
+      {
+        state = (unsigned)p->opt[prev].state;
+        if (dist < LZMA_NUM_REPS)
+          state = kRepNextStates[state];
+        else
+          state = kMatchNextStates[state];
+      }
+
+      prevOpt = &p->opt[prev];
+      b0 = prevOpt->reps[0];
+
+      if (dist < LZMA_NUM_REPS)
+      {
+        if (dist == 0)
+        {
+          reps[0] = b0;
+          reps[1] = prevOpt->reps[1];
+          reps[2] = prevOpt->reps[2];
+          reps[3] = prevOpt->reps[3];
+        }
+        else
+        {
+          reps[1] = b0;
+          b0 = prevOpt->reps[1];
+          if (dist == 1)
+          {
+            reps[0] = b0;
+            reps[2] = prevOpt->reps[2];
+            reps[3] = prevOpt->reps[3];
+          }
+          else
+          {
+            reps[2] = b0;
+            reps[0] = prevOpt->reps[dist];
+            reps[3] = prevOpt->reps[dist ^ 1];
+          }
+        }
+      }
+      else
+      {
+        reps[0] = (dist - LZMA_NUM_REPS + 1);
+        reps[1] = b0;
+        reps[2] = prevOpt->reps[1];
+        reps[3] = prevOpt->reps[2];
+      }
+    }
+    
+    curOpt->state = (CState)state;
+    curOpt->reps[0] = reps[0];
+    curOpt->reps[1] = reps[1];
+    curOpt->reps[2] = reps[2];
+    curOpt->reps[3] = reps[3];
+
+    data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
+    curByte = *data;
+    matchByte = *(data - reps[0]);
+
+    posState = (position & p->pbMask);
+
+    /*
+    The order of Price checks:
+       <  LIT
+       <= SHORT_REP
+       <  LIT : REP_0
+       <  REP    [ : LIT : REP_0 ]
+       <  MATCH  [ : LIT : REP_0 ]
+    */
+
+    {
+      UInt32 curPrice = curOpt->price;
+      unsigned prob = p->isMatch[state][posState];
+      matchPrice = curPrice + GET_PRICE_1(prob);
+      litPrice = curPrice + GET_PRICE_0(prob);
+    }
+
+    nextOpt = &p->opt[(size_t)cur + 1];
+    nextIsLit = False;
+
+    // here we can allow skip_items in p->opt, if we don't check (nextOpt->price < kInfinityPrice)
+    // 18.new.06
+    if ((nextOpt->price < kInfinityPrice
+        // && !IsLitState(state)
+        && matchByte == curByte)
+        || litPrice > nextOpt->price
+        )
+      litPrice = 0;
+    else
+    {
+      const CLzmaProb *probs = LIT_PROBS(position, *(data - 1));
+      litPrice += (!IsLitState(state) ?
+          LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) :
+          LitEnc_GetPrice(probs, curByte, p->ProbPrices));
+      
+      if (litPrice < nextOpt->price)
+      {
+        nextOpt->price = litPrice;
+        nextOpt->len = 1;
+        MakeAs_Lit(nextOpt);
+        nextIsLit = True;
+      }
+    }
+
+    repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]);
+    
+    numAvailFull = p->numAvail;
+    {
+      unsigned temp = kNumOpts - 1 - cur;
+      if (numAvailFull > temp)
+        numAvailFull = (UInt32)temp;
+    }
+
+    // 18.06
+    // ---------- SHORT_REP ----------
+    if (IsLitState(state)) // 18.new
+    if (matchByte == curByte)
+    if (repMatchPrice < nextOpt->price) // 18.new
+    // if (numAvailFull < 2 || data[1] != *(data - reps[0] + 1))
+    if (
+        // nextOpt->price >= kInfinityPrice ||
+        nextOpt->len < 2   // we can check nextOpt->len, if skip items are not allowed in p->opt
+        || (nextOpt->dist != 0
+            // && nextOpt->extra <= 1 // 17.old
+            )
+        )
+    {
+      UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, state, posState);
+      // if (shortRepPrice <= nextOpt->price) // 17.old
+      if (shortRepPrice < nextOpt->price)  // 18.new
+      {
+        nextOpt->price = shortRepPrice;
+        nextOpt->len = 1;
+        MakeAs_ShortRep(nextOpt);
+        nextIsLit = False;
+      }
+    }
+    
+    if (numAvailFull < 2)
+      continue;
+    numAvail = (numAvailFull <= p->numFastBytes ? numAvailFull : p->numFastBytes);
+
+    // numAvail <= p->numFastBytes
+
+    // ---------- LIT : REP_0 ----------
+
+    if (!nextIsLit
+        && litPrice != 0 // 18.new
+        && matchByte != curByte
+        && numAvailFull > 2)
+    {
+      const Byte *data2 = data - reps[0];
+      if (data[1] == data2[1] && data[2] == data2[2])
+      {
+        unsigned len;
+        unsigned limit = p->numFastBytes + 1;
+        if (limit > numAvailFull)
+          limit = numAvailFull;
+        for (len = 3; len < limit && data[len] == data2[len]; len++)
+        {}
+        
+        {
+          unsigned state2 = kLiteralNextStates[state];
+          unsigned posState2 = (position + 1) & p->pbMask;
+          UInt32 price = litPrice + GetPrice_Rep_0(p, state2, posState2);
+          {
+            unsigned offset = cur + len;
+
+            if (last < offset)
+              last = offset;
+          
+            // do
+            {
+              UInt32 price2;
+              COptimal *opt;
+              len--;
+              // price2 = price + GetPrice_Len_Rep_0(p, len, state2, posState2);
+              price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len);
+
+              opt = &p->opt[offset];
+              // offset--;
+              if (price2 < opt->price)
+              {
+                opt->price = price2;
+                opt->len = (UInt32)len;
+                opt->dist = 0;
+                opt->extra = 1;
+              }
+            }
+            // while (len >= 3);
+          }
+        }
+      }
+    }
+    
+    startLen = 2; /* speed optimization */
+
+    {
+      // ---------- REP ----------
+      unsigned repIndex = 0; // 17.old
+      // unsigned repIndex = IsLitState(state) ? 0 : 1; // 18.notused
+      for (; repIndex < LZMA_NUM_REPS; repIndex++)
+      {
+        unsigned len;
+        UInt32 price;
+        const Byte *data2 = data - reps[repIndex];
+        if (data[0] != data2[0] || data[1] != data2[1])
+          continue;
+        
+        for (len = 2; len < numAvail && data[len] == data2[len]; len++)
+        {}
+        
+        // if (len < startLen) continue; // 18.new: speed optimization
+
+        {
+          unsigned offset = cur + len;
+          if (last < offset)
+            last = offset;
+        }
+        {
+          unsigned len2 = len;
+          price = repMatchPrice + GetPrice_PureRep(p, repIndex, state, posState);
+          do
+          {
+            UInt32 price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState, len2);
+            COptimal *opt = &p->opt[cur + len2];
+            if (price2 < opt->price)
+            {
+              opt->price = price2;
+              opt->len = (UInt32)len2;
+              opt->dist = (UInt32)repIndex;
+              opt->extra = 0;
+            }
+          }
+          while (--len2 >= 2);
+        }
+        
+        if (repIndex == 0) startLen = len + 1;  // 17.old
+        // startLen = len + 1; // 18.new
+
+        /* if (_maxMode) */
+        {
+          // ---------- REP : LIT : REP_0 ----------
+          // numFastBytes + 1 + numFastBytes
+
+          unsigned len2 = len + 1;
+          unsigned limit = len2 + p->numFastBytes;
+          if (limit > numAvailFull)
+            limit = numAvailFull;
+          
+          len2 += 2;
+          if (len2 <= limit)
+          if (data[len2 - 2] == data2[len2 - 2])
+          if (data[len2 - 1] == data2[len2 - 1])
+          {
+            unsigned state2 = kRepNextStates[state];
+            unsigned posState2 = (position + len) & p->pbMask;
+            price += GET_PRICE_LEN(&p->repLenEnc, posState, len)
+                + GET_PRICE_0(p->isMatch[state2][posState2])
+                + LitEnc_Matched_GetPrice(LIT_PROBS(position + len, data[(size_t)len - 1]),
+                    data[len], data2[len], p->ProbPrices);
+            
+            // state2 = kLiteralNextStates[state2];
+            state2 = kState_LitAfterRep;
+            posState2 = (posState2 + 1) & p->pbMask;
+
+
+            price += GetPrice_Rep_0(p, state2, posState2);
+
+          for (; len2 < limit && data[len2] == data2[len2]; len2++)
+          {}
+          
+          len2 -= len;
+          // if (len2 >= 3)
+          {
+            {
+              unsigned offset = cur + len + len2;
+
+              if (last < offset)
+                last = offset;
+              // do
+              {
+                UInt32 price2;
+                COptimal *opt;
+                len2--;
+                // price2 = price + GetPrice_Len_Rep_0(p, len2, state2, posState2);
+                price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len2);
+
+                opt = &p->opt[offset];
+                // offset--;
+                if (price2 < opt->price)
+                {
+                  opt->price = price2;
+                  opt->len = (UInt32)len2;
+                  opt->extra = (CExtra)(len + 1);
+                  opt->dist = (UInt32)repIndex;
+                }
+              }
+              // while (len2 >= 3);
+            }
+          }
+          }
+        }
+      }
+    }
+
+
+    // ---------- MATCH ----------
+    /* for (unsigned len = 2; len <= newLen; len++) */
+    if (newLen > numAvail)
+    {
+      newLen = numAvail;
+      for (numPairs = 0; newLen > matches[numPairs]; numPairs += 2);
+      matches[numPairs] = (UInt32)newLen;
+      numPairs += 2;
+    }
+    
+    // startLen = 2; /* speed optimization */
+
+    if (newLen >= startLen)
+    {
+      UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]);
+      UInt32 dist;
+      unsigned offs, posSlot, len;
+      
+      {
+        unsigned offset = cur + newLen;
+        if (last < offset)
+          last = offset;
+      }
+
+      offs = 0;
+      while (startLen > matches[offs])
+        offs += 2;
+      dist = matches[(size_t)offs + 1];
+      
+      // if (dist >= kNumFullDistances)
+      GetPosSlot2(dist, posSlot);
+      
+      for (len = /*2*/ startLen; ; len++)
+      {
+        UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len);
+        {
+          COptimal *opt;
+          unsigned lenNorm = len - 2;
+          lenNorm = GetLenToPosState2(lenNorm);
+          if (dist < kNumFullDistances)
+            price += p->distancesPrices[lenNorm][dist & (kNumFullDistances - 1)];
+          else
+            price += p->posSlotPrices[lenNorm][posSlot] + p->alignPrices[dist & kAlignMask];
+          
+          opt = &p->opt[cur + len];
+          if (price < opt->price)
+          {
+            opt->price = price;
+            opt->len = (UInt32)len;
+            opt->dist = dist + LZMA_NUM_REPS;
+            opt->extra = 0;
+          }
+        }
+
+        if (len == matches[offs])
+        {
+          // if (p->_maxMode) {
+          // MATCH : LIT : REP_0
+
+          const Byte *data2 = data - dist - 1;
+          unsigned len2 = len + 1;
+          unsigned limit = len2 + p->numFastBytes;
+          if (limit > numAvailFull)
+            limit = numAvailFull;
+          
+          len2 += 2;
+          if (len2 <= limit)
+          if (data[len2 - 2] == data2[len2 - 2])
+          if (data[len2 - 1] == data2[len2 - 1])
+          {
+          for (; len2 < limit && data[len2] == data2[len2]; len2++)
+          {}
+          
+          len2 -= len;
+          
+          // if (len2 >= 3)
+          {
+            unsigned state2 = kMatchNextStates[state];
+            unsigned posState2 = (position + len) & p->pbMask;
+            unsigned offset;
+            price += GET_PRICE_0(p->isMatch[state2][posState2]);
+            price += LitEnc_Matched_GetPrice(LIT_PROBS(position + len, data[(size_t)len - 1]),
+                    data[len], data2[len], p->ProbPrices);
+
+            // state2 = kLiteralNextStates[state2];
+            state2 = kState_LitAfterMatch;
+
+            posState2 = (posState2 + 1) & p->pbMask;
+            price += GetPrice_Rep_0(p, state2, posState2);
+
+            offset = cur + len + len2;
+
+            if (last < offset)
+              last = offset;
+            // do
+            {
+              UInt32 price2;
+              COptimal *opt;
+              len2--;
+              // price2 = price + GetPrice_Len_Rep_0(p, len2, state2, posState2);
+              price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len2);
+              opt = &p->opt[offset];
+              // offset--;
+              if (price2 < opt->price)
+              {
+                opt->price = price2;
+                opt->len = (UInt32)len2;
+                opt->extra = (CExtra)(len + 1);
+                opt->dist = dist + LZMA_NUM_REPS;
+              }
+            }
+            // while (len2 >= 3);
+          }
+
+          }
+        
+          offs += 2;
+          if (offs == numPairs)
+            break;
+          dist = matches[(size_t)offs + 1];
+          // if (dist >= kNumFullDistances)
+            GetPosSlot2(dist, posSlot);
+        }
+      }
+    }
+  }
+
+  do
+    p->opt[last].price = kInfinityPrice;
+  while (--last);
+
+  return Backward(p, cur);
+}
+
+
+
+#define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist))
+
+
+
+static unsigned GetOptimumFast(CLzmaEnc *p)
+{
+  UInt32 numAvail, mainDist;
+  unsigned mainLen, numPairs, repIndex, repLen, i;
+  const Byte *data;
+
+  if (p->additionalOffset == 0)
+    mainLen = ReadMatchDistances(p, &numPairs);
+  else
+  {
+    mainLen = p->longestMatchLen;
+    numPairs = p->numPairs;
+  }
+
+  numAvail = p->numAvail;
+  p->backRes = MARK_LIT;
+  if (numAvail < 2)
+    return 1;
+  // if (mainLen < 2 && p->state == 0) return 1; // 18.06.notused
+  if (numAvail > LZMA_MATCH_LEN_MAX)
+    numAvail = LZMA_MATCH_LEN_MAX;
+  data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
+  repLen = repIndex = 0;
+  
+  for (i = 0; i < LZMA_NUM_REPS; i++)
+  {
+    unsigned len;
+    const Byte *data2 = data - p->reps[i];
+    if (data[0] != data2[0] || data[1] != data2[1])
+      continue;
+    for (len = 2; len < numAvail && data[len] == data2[len]; len++)
+    {}
+    if (len >= p->numFastBytes)
+    {
+      p->backRes = (UInt32)i;
+      MOVE_POS(p, len - 1)
+      return len;
+    }
+    if (len > repLen)
+    {
+      repIndex = i;
+      repLen = len;
+    }
+  }
+
+  if (mainLen >= p->numFastBytes)
+  {
+    p->backRes = p->matches[(size_t)numPairs - 1] + LZMA_NUM_REPS;
+    MOVE_POS(p, mainLen - 1)
+    return mainLen;
+  }
+
+  mainDist = 0; /* for GCC */
+  
+  if (mainLen >= 2)
+  {
+    mainDist = p->matches[(size_t)numPairs - 1];
+    while (numPairs > 2)
+    {
+      UInt32 dist2;
+      if (mainLen != p->matches[(size_t)numPairs - 4] + 1)
+        break;
+      dist2 = p->matches[(size_t)numPairs - 3];
+      if (!ChangePair(dist2, mainDist))
+        break;
+      numPairs -= 2;
+      mainLen--;
+      mainDist = dist2;
+    }
+    if (mainLen == 2 && mainDist >= 0x80)
+      mainLen = 1;
+  }
+
+  if (repLen >= 2)
+    if (    repLen + 1 >= mainLen
+        || (repLen + 2 >= mainLen && mainDist >= (1 << 9))
+        || (repLen + 3 >= mainLen && mainDist >= (1 << 15)))
+  {
+    p->backRes = (UInt32)repIndex;
+    MOVE_POS(p, repLen - 1)
+    return repLen;
+  }
+  
+  if (mainLen < 2 || numAvail <= 2)
+    return 1;
+
+  {
+    unsigned len1 = ReadMatchDistances(p, &p->numPairs);
+    p->longestMatchLen = len1;
+  
+    if (len1 >= 2)
+    {
+      UInt32 newDist = p->matches[(size_t)p->numPairs - 1];
+      if (   (len1 >= mainLen && newDist < mainDist)
+          || (len1 == mainLen + 1 && !ChangePair(mainDist, newDist))
+          || (len1 >  mainLen + 1)
+          || (len1 + 1 >= mainLen && mainLen >= 3 && ChangePair(newDist, mainDist)))
+        return 1;
+    }
+  }
+  
+  data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
+  
+  for (i = 0; i < LZMA_NUM_REPS; i++)
+  {
+    unsigned len, limit;
+    const Byte *data2 = data - p->reps[i];
+    if (data[0] != data2[0] || data[1] != data2[1])
+      continue;
+    limit = mainLen - 1;
+    for (len = 2;; len++)
+    {
+      if (len >= limit)
+        return 1;
+      if (data[len] != data2[len])
+        break;
+    }
+  }
+  
+  p->backRes = mainDist + LZMA_NUM_REPS;
+  if (mainLen != 2)
+  {
+    MOVE_POS(p, mainLen - 2)
+  }
+  return mainLen;
+}
+
+
+
+
+static void WriteEndMarker(CLzmaEnc *p, unsigned posState)
+{
+  UInt32 range;
+  range = p->rc.range;
+  {
+    UInt32 ttt, newBound;
+    CLzmaProb *prob = &p->isMatch[p->state][posState];
+    RC_BIT_PRE(&p->rc, prob)
+    RC_BIT_1(&p->rc, prob)
+    prob = &p->isRep[p->state];
+    RC_BIT_PRE(&p->rc, prob)
+    RC_BIT_0(&p->rc, prob)
+  }
+  p->state = kMatchNextStates[p->state];
+  
+  p->rc.range = range;
+  LenEnc_Encode(&p->lenProbs, &p->rc, 0, posState);
+  range = p->rc.range;
+
+  {
+    // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[0], (1 << kNumPosSlotBits) - 1);
+    CLzmaProb *probs = p->posSlotEncoder[0];
+    unsigned m = 1;
+    do
+    {
+      UInt32 ttt, newBound;
+      RC_BIT_PRE(p, probs + m)
+      RC_BIT_1(&p->rc, probs + m);
+      m = (m << 1) + 1;
+    }
+    while (m < (1 << kNumPosSlotBits));
+  }
+  {
+    // RangeEnc_EncodeDirectBits(&p->rc, ((UInt32)1 << (30 - kNumAlignBits)) - 1, 30 - kNumAlignBits);    UInt32 range = p->range;
+    unsigned numBits = 30 - kNumAlignBits;
+    do
+    {
+      range >>= 1;
+      p->rc.low += range;
+      RC_NORM(&p->rc)
+    }
+    while (--numBits);
+  }
+   
+  {
+    // RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask);
+    CLzmaProb *probs = p->posAlignEncoder;
+    unsigned m = 1;
+    do
+    {
+      UInt32 ttt, newBound;
+      RC_BIT_PRE(p, probs + m)
+      RC_BIT_1(&p->rc, probs + m);
+      m = (m << 1) + 1;
+    }
+    while (m < kAlignTableSize);
+  }
+  p->rc.range = range;
+}
+
+
+static SRes CheckErrors(CLzmaEnc *p)
+{
+  if (p->result != SZ_OK)
+    return p->result;
+  if (p->rc.res != SZ_OK)
+    p->result = SZ_ERROR_WRITE;
+  if (p->matchFinderBase.result != SZ_OK)
+    p->result = SZ_ERROR_READ;
+  if (p->result != SZ_OK)
+    p->finished = True;
+  return p->result;
+}
+
+
+MY_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos)
+{
+  /* ReleaseMFStream(); */
+  p->finished = True;
+  if (p->writeEndMark)
+    WriteEndMarker(p, nowPos & p->pbMask);
+  RangeEnc_FlushData(&p->rc);
+  RangeEnc_FlushStream(&p->rc);
+  return CheckErrors(p);
+}
+
+
+MY_NO_INLINE static void FillAlignPrices(CLzmaEnc *p)
+{
+  unsigned i;
+  const CProbPrice *ProbPrices = p->ProbPrices;
+  const CLzmaProb *probs = p->posAlignEncoder;
+  // p->alignPriceCount = 0;
+  for (i = 0; i < kAlignTableSize / 2; i++)
+  {
+    UInt32 price = 0;
+    unsigned sym = i;
+    unsigned m = 1;
+    unsigned bit;
+    UInt32 prob;
+    bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit;
+    bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit;
+    bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit;
+    prob = probs[m];
+    p->alignPrices[i    ] = price + GET_PRICEa_0(prob);
+    p->alignPrices[i + 8] = price + GET_PRICEa_1(prob);
+    // p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices);
+  }
+}
+
+
+MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p)
+{
+  // int y; for (y = 0; y < 100; y++) {
+
+  UInt32 tempPrices[kNumFullDistances];
+  unsigned i, lps;
+
+  const CProbPrice *ProbPrices = p->ProbPrices;
+  p->matchPriceCount = 0;
+
+  for (i = kStartPosModelIndex / 2; i < kNumFullDistances / 2; i++)
+  {
+    unsigned posSlot = GetPosSlot1(i);
+    unsigned footerBits = (posSlot >> 1) - 1;
+    unsigned base = ((2 | (posSlot & 1)) << footerBits);
+    const CLzmaProb *probs = p->posEncoders + (size_t)base * 2;
+    // tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base, footerBits, i - base, p->ProbPrices);
+    UInt32 price = 0;
+    unsigned m = 1;
+    unsigned sym = i;
+    unsigned offset = (unsigned)1 << footerBits;
+    base += i;
+    
+    if (footerBits)
+    do
+    {
+      unsigned bit = sym & 1;
+      sym >>= 1;
+      price += GET_PRICEa(probs[m], bit);
+      m = (m << 1) + bit;
+    }
+    while (--footerBits);
+
+    {
+      unsigned prob = probs[m];
+      tempPrices[base         ] = price + GET_PRICEa_0(prob);
+      tempPrices[base + offset] = price + GET_PRICEa_1(prob);
+    }
+  }
+
+  for (lps = 0; lps < kNumLenToPosStates; lps++)
+  {
+    unsigned slot;
+    unsigned distTableSize2 = (p->distTableSize + 1) >> 1;
+    UInt32 *posSlotPrices = p->posSlotPrices[lps];
+    const CLzmaProb *probs = p->posSlotEncoder[lps];
+    
+    for (slot = 0; slot < distTableSize2; slot++)
+    {
+      // posSlotPrices[slot] = RcTree_GetPrice(encoder, kNumPosSlotBits, slot, p->ProbPrices);
+      UInt32 price;
+      unsigned bit;
+      unsigned sym = slot + (1 << (kNumPosSlotBits - 1));
+      unsigned prob;
+      bit = sym & 1; sym >>= 1; price  = GET_PRICEa(probs[sym], bit);
+      bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
+      bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
+      bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
+      bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
+      prob = probs[(size_t)slot + (1 << (kNumPosSlotBits - 1))];
+      posSlotPrices[(size_t)slot * 2    ] = price + GET_PRICEa_0(prob);
+      posSlotPrices[(size_t)slot * 2 + 1] = price + GET_PRICEa_1(prob);
+    }
+    
+    {
+      UInt32 delta = ((UInt32)((kEndPosModelIndex / 2 - 1) - kNumAlignBits) << kNumBitPriceShiftBits);
+      for (slot = kEndPosModelIndex / 2; slot < distTableSize2; slot++)
+      {
+        posSlotPrices[(size_t)slot * 2    ] += delta;
+        posSlotPrices[(size_t)slot * 2 + 1] += delta;
+        delta += ((UInt32)1 << kNumBitPriceShiftBits);
+      }
+    }
+
+    {
+      UInt32 *dp = p->distancesPrices[lps];
+      
+      dp[0] = posSlotPrices[0];
+      dp[1] = posSlotPrices[1];
+      dp[2] = posSlotPrices[2];
+      dp[3] = posSlotPrices[3];
+
+      for (i = 4; i < kNumFullDistances; i += 2)
+      {
+        UInt32 slotPrice = posSlotPrices[GetPosSlot1(i)];
+        dp[i    ] = slotPrice + tempPrices[i];
+        dp[i + 1] = slotPrice + tempPrices[i + 1];
+      }
+    }
+  }
+  // }
+}
+
+
+
+void LzmaEnc_Construct(CLzmaEnc *p)
+{
+  RangeEnc_Construct(&p->rc);
+  MatchFinder_Construct(&p->matchFinderBase);
+  
+  #ifndef _7ZIP_ST
+  MatchFinderMt_Construct(&p->matchFinderMt);
+  p->matchFinderMt.MatchFinder = &p->matchFinderBase;
+  #endif
+
+  {
+    CLzmaEncProps props;
+    LzmaEncProps_Init(&props);
+    LzmaEnc_SetProps(p, &props);
+  }
+
+  #ifndef LZMA_LOG_BSR
+  LzmaEnc_FastPosInit(p->g_FastPos);
+  #endif
+
+  LzmaEnc_InitPriceTables(p->ProbPrices);
+  p->litProbs = NULL;
+  p->saveState.litProbs = NULL;
+
+}
+
+CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc)
+{
+  void *p;
+  p = ISzAlloc_Alloc(alloc, sizeof(CLzmaEnc));
+  if (p)
+    LzmaEnc_Construct((CLzmaEnc *)p);
+  return p;
+}
+
+void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)
+{
+  ISzAlloc_Free(alloc, p->litProbs);
+  ISzAlloc_Free(alloc, p->saveState.litProbs);
+  p->litProbs = NULL;
+  p->saveState.litProbs = NULL;
+}
+
+void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+  #ifndef _7ZIP_ST
+  MatchFinderMt_Destruct(&p->matchFinderMt, allocBig);
+  #endif
+  
+  MatchFinder_Free(&p->matchFinderBase, allocBig);
+  LzmaEnc_FreeLits(p, alloc);
+  RangeEnc_Free(&p->rc, alloc);
+}
+
+void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+  LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig);
+  ISzAlloc_Free(alloc, p);
+}
+
+
+static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize)
+{
+  UInt32 nowPos32, startPos32;
+  if (p->needInit)
+  {
+    p->matchFinder.Init(p->matchFinderObj);
+    p->needInit = 0;
+  }
+
+  if (p->finished)
+    return p->result;
+  RINOK(CheckErrors(p));
+
+  nowPos32 = (UInt32)p->nowPos64;
+  startPos32 = nowPos32;
+
+  if (p->nowPos64 == 0)
+  {
+    unsigned numPairs;
+    Byte curByte;
+    if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0)
+      return Flush(p, nowPos32);
+    ReadMatchDistances(p, &numPairs);
+    RangeEnc_EncodeBit_0(&p->rc, &p->isMatch[kState_Start][0]);
+    // p->state = kLiteralNextStates[p->state];
+    curByte = *(p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset);
+    LitEnc_Encode(&p->rc, p->litProbs, curByte);
+    p->additionalOffset--;
+    nowPos32++;
+  }
+
+  if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0)
+  
+  for (;;)
+  {
+    UInt32 dist;
+    unsigned len, posState;
+    UInt32 range, ttt, newBound;
+    CLzmaProb *probs;
+  
+    if (p->fastMode)
+      len = GetOptimumFast(p);
+    else
+    {
+      unsigned oci = p->optCur;
+      if (p->optEnd == oci)
+        len = GetOptimum(p, nowPos32);
+      else
+      {
+        const COptimal *opt = &p->opt[oci];
+        len = opt->len;
+        p->backRes = opt->dist;
+        p->optCur = oci + 1;
+      }
+    }
+
+    posState = (unsigned)nowPos32 & p->pbMask;
+    range = p->rc.range;
+    probs = &p->isMatch[p->state][posState];
+    
+    RC_BIT_PRE(&p->rc, probs)
+    
+    dist = p->backRes;
+
+    #ifdef SHOW_STAT2
+    printf("\n pos = %6X, len = %3u  pos = %6u", nowPos32, len, dist);
+    #endif
+
+    if (dist == MARK_LIT)
+    {
+      Byte curByte;
+      const Byte *data;
+      unsigned state;
+
+      RC_BIT_0(&p->rc, probs);
+      p->rc.range = range;
+      data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
+      probs = LIT_PROBS(nowPos32, *(data - 1));
+      curByte = *data;
+      state = p->state;
+      p->state = kLiteralNextStates[state];
+      if (IsLitState(state))
+        LitEnc_Encode(&p->rc, probs, curByte);
+      else
+        LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0]));
+    }
+    else
+    {
+      RC_BIT_1(&p->rc, probs);
+      probs = &p->isRep[p->state];
+      RC_BIT_PRE(&p->rc, probs)
+      
+      if (dist < LZMA_NUM_REPS)
+      {
+        RC_BIT_1(&p->rc, probs);
+        probs = &p->isRepG0[p->state];
+        RC_BIT_PRE(&p->rc, probs)
+        if (dist == 0)
+        {
+          RC_BIT_0(&p->rc, probs);
+          probs = &p->isRep0Long[p->state][posState];
+          RC_BIT_PRE(&p->rc, probs)
+          if (len != 1)
+          {
+            RC_BIT_1_BASE(&p->rc, probs);
+          }
+          else
+          {
+            RC_BIT_0_BASE(&p->rc, probs);
+            p->state = kShortRepNextStates[p->state];
+          }
+        }
+        else
+        {
+          RC_BIT_1(&p->rc, probs);
+          probs = &p->isRepG1[p->state];
+          RC_BIT_PRE(&p->rc, probs)
+          if (dist == 1)
+          {
+            RC_BIT_0_BASE(&p->rc, probs);
+            dist = p->reps[1];
+          }
+          else
+          {
+            RC_BIT_1(&p->rc, probs);
+            probs = &p->isRepG2[p->state];
+            RC_BIT_PRE(&p->rc, probs)
+            if (dist == 2)
+            {
+              RC_BIT_0_BASE(&p->rc, probs);
+              dist = p->reps[2];
+            }
+            else
+            {
+              RC_BIT_1_BASE(&p->rc, probs);
+              dist = p->reps[3];
+              p->reps[3] = p->reps[2];
+            }
+            p->reps[2] = p->reps[1];
+          }
+          p->reps[1] = p->reps[0];
+          p->reps[0] = dist;
+        }
+
+        RC_NORM(&p->rc)
+
+        p->rc.range = range;
+
+        if (len != 1)
+        {
+          LenEnc_Encode(&p->repLenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState);
+          --p->repLenEncCounter;
+          p->state = kRepNextStates[p->state];
+        }
+      }
+      else
+      {
+        unsigned posSlot;
+        RC_BIT_0(&p->rc, probs);
+        p->rc.range = range;
+        p->state = kMatchNextStates[p->state];
+
+        LenEnc_Encode(&p->lenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState);
+        // --p->lenEnc.counter;
+
+        dist -= LZMA_NUM_REPS;
+        p->reps[3] = p->reps[2];
+        p->reps[2] = p->reps[1];
+        p->reps[1] = p->reps[0];
+        p->reps[0] = dist + 1;
+        
+        p->matchPriceCount++;
+        GetPosSlot(dist, posSlot);
+        // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], posSlot);
+        {
+          UInt32 sym = (UInt32)posSlot + (1 << kNumPosSlotBits);
+          range = p->rc.range;
+          probs = p->posSlotEncoder[GetLenToPosState(len)];
+          do
+          {
+            CLzmaProb *prob = probs + (sym >> kNumPosSlotBits);
+            UInt32 bit = (sym >> (kNumPosSlotBits - 1)) & 1;
+            sym <<= 1;
+            RC_BIT(&p->rc, prob, bit);
+          }
+          while (sym < (1 << kNumPosSlotBits * 2));
+          p->rc.range = range;
+        }
+        
+        if (dist >= kStartPosModelIndex)
+        {
+          unsigned footerBits = ((posSlot >> 1) - 1);
+
+          if (dist < kNumFullDistances)
+          {
+            unsigned base = ((2 | (posSlot & 1)) << footerBits);
+            RcTree_ReverseEncode(&p->rc, p->posEncoders + base, footerBits, (unsigned)(dist /* - base */));
+          }
+          else
+          {
+            UInt32 pos2 = (dist | 0xF) << (32 - footerBits);
+            range = p->rc.range;
+            // RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits);
+            /*
+            do
+            {
+              range >>= 1;
+              p->rc.low += range & (0 - ((dist >> --footerBits) & 1));
+              RC_NORM(&p->rc)
+            }
+            while (footerBits > kNumAlignBits);
+            */
+            do
+            {
+              range >>= 1;
+              p->rc.low += range & (0 - (pos2 >> 31));
+              pos2 += pos2;
+              RC_NORM(&p->rc)
+            }
+            while (pos2 != 0xF0000000);
+
+
+            // RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask);
+
+            {
+              unsigned m = 1;
+              unsigned bit;
+              bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit;
+              bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit;
+              bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit;
+              bit = dist & 1;             RC_BIT(&p->rc, p->posAlignEncoder + m, bit);
+              p->rc.range = range;
+              // p->alignPriceCount++;
+            }
+          }
+        }
+      }
+    }
+
+    nowPos32 += (UInt32)len;
+    p->additionalOffset -= len;
+    
+    if (p->additionalOffset == 0)
+    {
+      UInt32 processed;
+
+      if (!p->fastMode)
+      {
+        /*
+        if (p->alignPriceCount >= 16) // kAlignTableSize
+          FillAlignPrices(p);
+        if (p->matchPriceCount >= 128)
+          FillDistancesPrices(p);
+        if (p->lenEnc.counter <= 0)
+          LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices);
+        */
+        if (p->matchPriceCount >= 64)
+        {
+          FillAlignPrices(p);
+          // { int y; for (y = 0; y < 100; y++) {
+          FillDistancesPrices(p);
+          // }}
+          LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices);
+        }
+        if (p->repLenEncCounter <= 0)
+        {
+          p->repLenEncCounter = REP_LEN_COUNT;
+          LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices);
+        }
+      }
+    
+      if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0)
+        break;
+      processed = nowPos32 - startPos32;
+      
+      if (maxPackSize)
+      {
+        if (processed + kNumOpts + 300 >= maxUnpackSize
+            || RangeEnc_GetProcessed_sizet(&p->rc) + kPackReserve >= maxPackSize)
+          break;
+      }
+      else if (processed >= (1 << 17))
+      {
+        p->nowPos64 += nowPos32 - startPos32;
+        return CheckErrors(p);
+      }
+    }
+  }
+
+  p->nowPos64 += nowPos32 - startPos32;
+  return Flush(p, nowPos32);
+}
+
+
+
+#define kBigHashDicLimit ((UInt32)1 << 24)
+
+static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+  UInt32 beforeSize = kNumOpts;
+  if (!RangeEnc_Alloc(&p->rc, alloc))
+    return SZ_ERROR_MEM;
+
+  #ifndef _7ZIP_ST
+  p->mtMode = (p->multiThread && !p->fastMode && (p->matchFinderBase.btMode != 0));
+  #endif
+
+  {
+    unsigned lclp = p->lc + p->lp;
+    if (!p->litProbs || !p->saveState.litProbs || p->lclp != lclp)
+    {
+      LzmaEnc_FreeLits(p, alloc);
+      p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb));
+      p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb));
+      if (!p->litProbs || !p->saveState.litProbs)
+      {
+        LzmaEnc_FreeLits(p, alloc);
+        return SZ_ERROR_MEM;
+      }
+      p->lclp = lclp;
+    }
+  }
+
+  p->matchFinderBase.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0);
+
+  if (beforeSize + p->dictSize < keepWindowSize)
+    beforeSize = keepWindowSize - p->dictSize;
+
+  #ifndef _7ZIP_ST
+  if (p->mtMode)
+  {
+    RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes,
+        LZMA_MATCH_LEN_MAX
+        + 1  /* 18.04 */
+        , allocBig));
+    p->matchFinderObj = &p->matchFinderMt;
+    p->matchFinderBase.bigHash = (Byte)(
+        (p->dictSize > kBigHashDicLimit && p->matchFinderBase.hashMask >= 0xFFFFFF) ? 1 : 0);
+    MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder);
+  }
+  else
+  #endif
+  {
+    if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig))
+      return SZ_ERROR_MEM;
+    p->matchFinderObj = &p->matchFinderBase;
+    MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder);
+  }
+  
+  return SZ_OK;
+}
+
+void LzmaEnc_Init(CLzmaEnc *p)
+{
+  unsigned i;
+  p->state = 0;
+  p->reps[0] =
+  p->reps[1] =
+  p->reps[2] =
+  p->reps[3] = 1;
+
+  RangeEnc_Init(&p->rc);
+
+  for (i = 0; i < (1 << kNumAlignBits); i++)
+    p->posAlignEncoder[i] = kProbInitValue;
+
+  for (i = 0; i < kNumStates; i++)
+  {
+    unsigned j;
+    for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++)
+    {
+      p->isMatch[i][j] = kProbInitValue;
+      p->isRep0Long[i][j] = kProbInitValue;
+    }
+    p->isRep[i] = kProbInitValue;
+    p->isRepG0[i] = kProbInitValue;
+    p->isRepG1[i] = kProbInitValue;
+    p->isRepG2[i] = kProbInitValue;
+  }
+
+  {
+    for (i = 0; i < kNumLenToPosStates; i++)
+    {
+      CLzmaProb *probs = p->posSlotEncoder[i];
+      unsigned j;
+      for (j = 0; j < (1 << kNumPosSlotBits); j++)
+        probs[j] = kProbInitValue;
+    }
+  }
+  {
+    for (i = 0; i < kNumFullDistances; i++)
+      p->posEncoders[i] = kProbInitValue;
+  }
+
+  {
+    UInt32 num = (UInt32)0x300 << (p->lp + p->lc);
+    UInt32 k;
+    CLzmaProb *probs = p->litProbs;
+    for (k = 0; k < num; k++)
+      probs[k] = kProbInitValue;
+  }
+
+
+  LenEnc_Init(&p->lenProbs);
+  LenEnc_Init(&p->repLenProbs);
+
+  p->optEnd = 0;
+  p->optCur = 0;
+
+  {
+    for (i = 0; i < kNumOpts; i++)
+      p->opt[i].price = kInfinityPrice;
+  }
+
+  p->additionalOffset = 0;
+
+  p->pbMask = (1 << p->pb) - 1;
+  p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc);
+}
+
+
+void LzmaEnc_InitPrices(CLzmaEnc *p)
+{
+  if (!p->fastMode)
+  {
+    FillDistancesPrices(p);
+    FillAlignPrices(p);
+  }
+
+  p->lenEnc.tableSize =
+  p->repLenEnc.tableSize =
+      p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN;
+
+  p->repLenEncCounter = REP_LEN_COUNT;
+
+  LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices);
+  LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices);
+}
+
+static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+  unsigned i;
+  for (i = kEndPosModelIndex / 2; i < kDicLogSizeMax; i++)
+    if (p->dictSize <= ((UInt32)1 << i))
+      break;
+  p->distTableSize = i * 2;
+
+  p->finished = False;
+  p->result = SZ_OK;
+  RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig));
+  LzmaEnc_Init(p);
+  LzmaEnc_InitPrices(p);
+  p->nowPos64 = 0;
+  return SZ_OK;
+}
+
+static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream,
+    ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  p->matchFinderBase.stream = inStream;
+  p->needInit = 1;
+  p->rc.outStream = outStream;
+  return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig);
+}
+
+SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp,
+    ISeqInStream *inStream, UInt32 keepWindowSize,
+    ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  p->matchFinderBase.stream = inStream;
+  p->needInit = 1;
+  return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
+}
+
+static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen)
+{
+  p->matchFinderBase.directInput = 1;
+  p->matchFinderBase.bufferBase = (Byte *)src;
+  p->matchFinderBase.directInputRem = srcLen;
+}
+
+SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
+    UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  LzmaEnc_SetInputBuf(p, src, srcLen);
+  p->needInit = 1;
+
+  LzmaEnc_SetDataSize(pp, srcLen);
+  return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
+}
+
+void LzmaEnc_Finish(CLzmaEncHandle pp)
+{
+  #ifndef _7ZIP_ST
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  if (p->mtMode)
+    MatchFinderMt_ReleaseStream(&p->matchFinderMt);
+  #else
+  UNUSED_VAR(pp);
+  #endif
+}
+
+
+typedef struct
+{
+  ISeqOutStream vt;
+  Byte *data;
+  SizeT rem;
+  BoolInt overflow;
+} CLzmaEnc_SeqOutStreamBuf;
+
+static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, size_t size)
+{
+  CLzmaEnc_SeqOutStreamBuf *p = CONTAINER_FROM_VTBL(pp, CLzmaEnc_SeqOutStreamBuf, vt);
+  if (p->rem < size)
+  {
+    size = p->rem;
+    p->overflow = True;
+  }
+  memcpy(p->data, data, size);
+  p->rem -= size;
+  p->data += size;
+  return size;
+}
+
+
+UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp)
+{
+  const CLzmaEnc *p = (CLzmaEnc *)pp;
+  return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
+}
+
+
+const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp)
+{
+  const CLzmaEnc *p = (CLzmaEnc *)pp;
+  return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
+}
+
+
+SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
+    Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  UInt64 nowPos64;
+  SRes res;
+  CLzmaEnc_SeqOutStreamBuf outStream;
+
+  outStream.vt.Write = SeqOutStreamBuf_Write;
+  outStream.data = dest;
+  outStream.rem = *destLen;
+  outStream.overflow = False;
+
+  p->writeEndMark = False;
+  p->finished = False;
+  p->result = SZ_OK;
+
+  if (reInit)
+    LzmaEnc_Init(p);
+  LzmaEnc_InitPrices(p);
+
+  nowPos64 = p->nowPos64;
+  RangeEnc_Init(&p->rc);
+  p->rc.outStream = &outStream.vt;
+
+  if (desiredPackSize == 0)
+    return SZ_ERROR_OUTPUT_EOF;
+
+  res = LzmaEnc_CodeOneBlock(p, desiredPackSize, *unpackSize);
+  
+  *unpackSize = (UInt32)(p->nowPos64 - nowPos64);
+  *destLen -= outStream.rem;
+  if (outStream.overflow)
+    return SZ_ERROR_OUTPUT_EOF;
+
+  return res;
+}
+
+
+static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress)
+{
+  SRes res = SZ_OK;
+
+  #ifndef _7ZIP_ST
+  Byte allocaDummy[0x300];
+  allocaDummy[0] = 0;
+  allocaDummy[1] = allocaDummy[0];
+  #endif
+
+  for (;;)
+  {
+    res = LzmaEnc_CodeOneBlock(p, 0, 0);
+    if (res != SZ_OK || p->finished)
+      break;
+    if (progress)
+    {
+      res = ICompressProgress_Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc));
+      if (res != SZ_OK)
+      {
+        res = SZ_ERROR_PROGRESS;
+        break;
+      }
+    }
+  }
+  
+  LzmaEnc_Finish(p);
+
+  /*
+  if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&p->matchFinderBase))
+    res = SZ_ERROR_FAIL;
+  }
+  */
+
+  return res;
+}
+
+
+SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress,
+    ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+  RINOK(LzmaEnc_Prepare(pp, outStream, inStream, alloc, allocBig));
+  return LzmaEnc_Encode2((CLzmaEnc *)pp, progress);
+}
+
+
+SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  unsigned i;
+  UInt32 dictSize = p->dictSize;
+  if (*size < LZMA_PROPS_SIZE)
+    return SZ_ERROR_PARAM;
+  *size = LZMA_PROPS_SIZE;
+  props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
+
+  if (dictSize >= ((UInt32)1 << 22))
+  {
+    UInt32 kDictMask = ((UInt32)1 << 20) - 1;
+    if (dictSize < (UInt32)0xFFFFFFFF - kDictMask)
+      dictSize = (dictSize + kDictMask) & ~kDictMask;
+  }
+  else for (i = 11; i <= 30; i++)
+  {
+    if (dictSize <= ((UInt32)2 << i)) { dictSize = (2 << i); break; }
+    if (dictSize <= ((UInt32)3 << i)) { dictSize = (3 << i); break; }
+  }
+
+  for (i = 0; i < 4; i++)
+    props[1 + i] = (Byte)(dictSize >> (8 * i));
+  return SZ_OK;
+}
+
+
+unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle pp)
+{
+  return ((CLzmaEnc *)pp)->writeEndMark;
+}
+
+
+SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+    int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+  SRes res;
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+
+  CLzmaEnc_SeqOutStreamBuf outStream;
+
+  outStream.vt.Write = SeqOutStreamBuf_Write;
+  outStream.data = dest;
+  outStream.rem = *destLen;
+  outStream.overflow = False;
+
+  p->writeEndMark = writeEndMark;
+  p->rc.outStream = &outStream.vt;
+
+  res = LzmaEnc_MemPrepare(pp, src, srcLen, 0, alloc, allocBig);
+  
+  if (res == SZ_OK)
+  {
+    res = LzmaEnc_Encode2(p, progress);
+    if (res == SZ_OK && p->nowPos64 != srcLen)
+      res = SZ_ERROR_FAIL;
+  }
+
+  *destLen -= outStream.rem;
+  if (outStream.overflow)
+    return SZ_ERROR_OUTPUT_EOF;
+  return res;
+}
+
+
+SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+    const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
+    ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+  CLzmaEnc *p = (CLzmaEnc *)LzmaEnc_Create(alloc);
+  SRes res;
+  if (!p)
+    return SZ_ERROR_MEM;
+
+  res = LzmaEnc_SetProps(p, props);
+  if (res == SZ_OK)
+  {
+    res = LzmaEnc_WriteProperties(p, propsEncoded, propsSize);
+    if (res == SZ_OK)
+      res = LzmaEnc_MemEncode(p, dest, destLen, src, srcLen,
+          writeEndMark, progress, alloc, allocBig);
+  }
+
+  LzmaEnc_Destroy(p, alloc, allocBig);
+  return res;
+}
diff --git a/libraries/lzma/C/LzmaEnc.h b/libraries/lzma/C/LzmaEnc.h
new file mode 100644
index 000000000..9194ee576
--- /dev/null
+++ b/libraries/lzma/C/LzmaEnc.h
@@ -0,0 +1,76 @@
+/*  LzmaEnc.h -- LZMA Encoder
+2017-07-27 : Igor Pavlov : Public domain */
+
+#ifndef __LZMA_ENC_H
+#define __LZMA_ENC_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+#define LZMA_PROPS_SIZE 5
+
+typedef struct _CLzmaEncProps
+{
+  int level;       /* 0 <= level <= 9 */
+  UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version
+                      (1 << 12) <= dictSize <= (3 << 29) for 64-bit version
+                      default = (1 << 24) */
+  int lc;          /* 0 <= lc <= 8, default = 3 */
+  int lp;          /* 0 <= lp <= 4, default = 0 */
+  int pb;          /* 0 <= pb <= 4, default = 2 */
+  int algo;        /* 0 - fast, 1 - normal, default = 1 */
+  int fb;          /* 5 <= fb <= 273, default = 32 */
+  int btMode;      /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */
+  int numHashBytes; /* 2, 3 or 4, default = 4 */
+  UInt32 mc;       /* 1 <= mc <= (1 << 30), default = 32 */
+  unsigned writeEndMark;  /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */
+  int numThreads;  /* 1 or 2, default = 2 */
+
+  UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1.
+                        Encoder uses this value to reduce dictionary size */
+} CLzmaEncProps;
+
+void LzmaEncProps_Init(CLzmaEncProps *p);
+void LzmaEncProps_Normalize(CLzmaEncProps *p);
+UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2);
+
+
+/* ---------- CLzmaEncHandle Interface ---------- */
+
+/* LzmaEnc* functions can return the following exit codes:
+SRes:
+  SZ_OK           - OK
+  SZ_ERROR_MEM    - Memory allocation error
+  SZ_ERROR_PARAM  - Incorrect paramater in props
+  SZ_ERROR_WRITE  - ISeqOutStream write callback error
+  SZ_ERROR_OUTPUT_EOF - output buffer overflow - version with (Byte *) output
+  SZ_ERROR_PROGRESS - some break from progress callback
+  SZ_ERROR_THREAD - error in multithreading functions (only for Mt version)
+*/
+
+typedef void * CLzmaEncHandle;
+
+CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc);
+void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+
+SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props);
+void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize);
+SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size);
+unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p);
+
+SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream,
+    ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+    int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+
+
+/* ---------- One Call Interface ---------- */
+
+SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+    const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
+    ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+
+EXTERN_C_END
+
+#endif
diff --git a/libraries/lzma/C/Ppmd.h b/libraries/lzma/C/Ppmd.h
new file mode 100644
index 000000000..a5c1e3ef2
--- /dev/null
+++ b/libraries/lzma/C/Ppmd.h
@@ -0,0 +1,85 @@
+/* Ppmd.h -- PPMD codec common code
+2017-04-03 : Igor Pavlov : Public domain
+This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
+
+#ifndef __PPMD_H
+#define __PPMD_H
+
+#include "CpuArch.h"
+
+EXTERN_C_BEGIN
+
+#ifdef MY_CPU_32BIT
+  #define PPMD_32BIT
+#endif
+
+#define PPMD_INT_BITS 7
+#define PPMD_PERIOD_BITS 7
+#define PPMD_BIN_SCALE (1 << (PPMD_INT_BITS + PPMD_PERIOD_BITS))
+
+#define PPMD_GET_MEAN_SPEC(summ, shift, round) (((summ) + (1 << ((shift) - (round)))) >> (shift))
+#define PPMD_GET_MEAN(summ) PPMD_GET_MEAN_SPEC((summ), PPMD_PERIOD_BITS, 2)
+#define PPMD_UPDATE_PROB_0(prob) ((prob) + (1 << PPMD_INT_BITS) - PPMD_GET_MEAN(prob))
+#define PPMD_UPDATE_PROB_1(prob) ((prob) - PPMD_GET_MEAN(prob))
+
+#define PPMD_N1 4
+#define PPMD_N2 4
+#define PPMD_N3 4
+#define PPMD_N4 ((128 + 3 - 1 * PPMD_N1 - 2 * PPMD_N2 - 3 * PPMD_N3) / 4)
+#define PPMD_NUM_INDEXES (PPMD_N1 + PPMD_N2 + PPMD_N3 + PPMD_N4)
+
+#pragma pack(push, 1)
+/* Most compilers works OK here even without #pragma pack(push, 1), but some GCC compilers need it. */
+
+/* SEE-contexts for PPM-contexts with masked symbols */
+typedef struct
+{
+  UInt16 Summ; /* Freq */
+  Byte Shift;  /* Speed of Freq change; low Shift is for fast change */
+  Byte Count;  /* Count to next change of Shift */
+} CPpmd_See;
+
+#define Ppmd_See_Update(p)  if ((p)->Shift < PPMD_PERIOD_BITS && --(p)->Count == 0) \
+    { (p)->Summ <<= 1; (p)->Count = (Byte)(3 << (p)->Shift++); }
+
+typedef struct
+{
+  Byte Symbol;
+  Byte Freq;
+  UInt16 SuccessorLow;
+  UInt16 SuccessorHigh;
+} CPpmd_State;
+
+#pragma pack(pop)
+
+typedef
+  #ifdef PPMD_32BIT
+    CPpmd_State *
+  #else
+    UInt32
+  #endif
+  CPpmd_State_Ref;
+
+typedef
+  #ifdef PPMD_32BIT
+    void *
+  #else
+    UInt32
+  #endif
+  CPpmd_Void_Ref;
+
+typedef
+  #ifdef PPMD_32BIT
+    Byte *
+  #else
+    UInt32
+  #endif
+  CPpmd_Byte_Ref;
+
+#define PPMD_SetAllBitsIn256Bytes(p) \
+  { size_t z; for (z = 0; z < 256 / sizeof(p[0]); z += 8) { \
+  p[z+7] = p[z+6] = p[z+5] = p[z+4] = p[z+3] = p[z+2] = p[z+1] = p[z+0] = ~(size_t)0; }}
+
+EXTERN_C_END
+ 
+#endif
diff --git a/libraries/lzma/C/Ppmd7.c b/libraries/lzma/C/Ppmd7.c
new file mode 100644
index 000000000..470aadccf
--- /dev/null
+++ b/libraries/lzma/C/Ppmd7.c
@@ -0,0 +1,712 @@
+/* Ppmd7.c -- PPMdH codec
+2018-07-04 : Igor Pavlov : Public domain
+This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
+
+#include "Precomp.h"
+
+#include <string.h>
+
+#include "Ppmd7.h"
+
+const Byte PPMD7_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 };
+static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051};
+
+#define MAX_FREQ 124
+#define UNIT_SIZE 12
+
+#define U2B(nu) ((UInt32)(nu) * UNIT_SIZE)
+#define U2I(nu) (p->Units2Indx[(size_t)(nu) - 1])
+#define I2U(indx) (p->Indx2Units[indx])
+
+#ifdef PPMD_32BIT
+  #define REF(ptr) (ptr)
+#else
+  #define REF(ptr) ((UInt32)((Byte *)(ptr) - (p)->Base))
+#endif
+
+#define STATS_REF(ptr) ((CPpmd_State_Ref)REF(ptr))
+
+#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref))
+#define STATS(ctx) Ppmd7_GetStats(p, ctx)
+#define ONE_STATE(ctx) Ppmd7Context_OneState(ctx)
+#define SUFFIX(ctx) CTX((ctx)->Suffix)
+
+typedef CPpmd7_Context * CTX_PTR;
+
+struct CPpmd7_Node_;
+
+typedef
+  #ifdef PPMD_32BIT
+    struct CPpmd7_Node_ *
+  #else
+    UInt32
+  #endif
+  CPpmd7_Node_Ref;
+
+typedef struct CPpmd7_Node_
+{
+  UInt16 Stamp; /* must be at offset 0 as CPpmd7_Context::NumStats. Stamp=0 means free */
+  UInt16 NU;
+  CPpmd7_Node_Ref Next; /* must be at offset >= 4 */
+  CPpmd7_Node_Ref Prev;
+} CPpmd7_Node;
+
+#ifdef PPMD_32BIT
+  #define NODE(ptr) (ptr)
+#else
+  #define NODE(offs) ((CPpmd7_Node *)(p->Base + (offs)))
+#endif
+
+void Ppmd7_Construct(CPpmd7 *p)
+{
+  unsigned i, k, m;
+
+  p->Base = 0;
+
+  for (i = 0, k = 0; i < PPMD_NUM_INDEXES; i++)
+  {
+    unsigned step = (i >= 12 ? 4 : (i >> 2) + 1);
+    do { p->Units2Indx[k++] = (Byte)i; } while (--step);
+    p->Indx2Units[i] = (Byte)k;
+  }
+
+  p->NS2BSIndx[0] = (0 << 1);
+  p->NS2BSIndx[1] = (1 << 1);
+  memset(p->NS2BSIndx + 2, (2 << 1), 9);
+  memset(p->NS2BSIndx + 11, (3 << 1), 256 - 11);
+
+  for (i = 0; i < 3; i++)
+    p->NS2Indx[i] = (Byte)i;
+  for (m = i, k = 1; i < 256; i++)
+  {
+    p->NS2Indx[i] = (Byte)m;
+    if (--k == 0)
+      k = (++m) - 2;
+  }
+
+  memset(p->HB2Flag, 0, 0x40);
+  memset(p->HB2Flag + 0x40, 8, 0x100 - 0x40);
+}
+
+void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc)
+{
+  ISzAlloc_Free(alloc, p->Base);
+  p->Size = 0;
+  p->Base = 0;
+}
+
+BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc)
+{
+  if (!p->Base || p->Size != size)
+  {
+    size_t size2;
+    Ppmd7_Free(p, alloc);
+    size2 = 0
+      #ifndef PPMD_32BIT
+      + UNIT_SIZE
+      #endif
+      ;
+    p->AlignOffset =
+      #ifdef PPMD_32BIT
+        (4 - size) & 3;
+      #else
+        4 - (size & 3);
+      #endif
+    if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size + size2)) == 0)
+      return False;
+    p->Size = size;
+  }
+  return True;
+}
+
+static void InsertNode(CPpmd7 *p, void *node, unsigned indx)
+{
+  *((CPpmd_Void_Ref *)node) = p->FreeList[indx];
+  p->FreeList[indx] = REF(node);
+}
+
+static void *RemoveNode(CPpmd7 *p, unsigned indx)
+{
+  CPpmd_Void_Ref *node = (CPpmd_Void_Ref *)Ppmd7_GetPtr(p, p->FreeList[indx]);
+  p->FreeList[indx] = *node;
+  return node;
+}
+
+static void SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx)
+{
+  unsigned i, nu = I2U(oldIndx) - I2U(newIndx);
+  ptr = (Byte *)ptr + U2B(I2U(newIndx));
+  if (I2U(i = U2I(nu)) != nu)
+  {
+    unsigned k = I2U(--i);
+    InsertNode(p, ((Byte *)ptr) + U2B(k), nu - k - 1);
+  }
+  InsertNode(p, ptr, i);
+}
+
+static void GlueFreeBlocks(CPpmd7 *p)
+{
+  #ifdef PPMD_32BIT
+  CPpmd7_Node headItem;
+  CPpmd7_Node_Ref head = &headItem;
+  #else
+  CPpmd7_Node_Ref head = p->AlignOffset + p->Size;
+  #endif
+  
+  CPpmd7_Node_Ref n = head;
+  unsigned i;
+
+  p->GlueCount = 255;
+
+  /* create doubly-linked list of free blocks */
+  for (i = 0; i < PPMD_NUM_INDEXES; i++)
+  {
+    UInt16 nu = I2U(i);
+    CPpmd7_Node_Ref next = (CPpmd7_Node_Ref)p->FreeList[i];
+    p->FreeList[i] = 0;
+    while (next != 0)
+    {
+      CPpmd7_Node *node = NODE(next);
+      node->Next = n;
+      n = NODE(n)->Prev = next;
+      next = *(const CPpmd7_Node_Ref *)node;
+      node->Stamp = 0;
+      node->NU = (UInt16)nu;
+    }
+  }
+  NODE(head)->Stamp = 1;
+  NODE(head)->Next = n;
+  NODE(n)->Prev = head;
+  if (p->LoUnit != p->HiUnit)
+    ((CPpmd7_Node *)p->LoUnit)->Stamp = 1;
+  
+  /* Glue free blocks */
+  while (n != head)
+  {
+    CPpmd7_Node *node = NODE(n);
+    UInt32 nu = (UInt32)node->NU;
+    for (;;)
+    {
+      CPpmd7_Node *node2 = NODE(n) + nu;
+      nu += node2->NU;
+      if (node2->Stamp != 0 || nu >= 0x10000)
+        break;
+      NODE(node2->Prev)->Next = node2->Next;
+      NODE(node2->Next)->Prev = node2->Prev;
+      node->NU = (UInt16)nu;
+    }
+    n = node->Next;
+  }
+  
+  /* Fill lists of free blocks */
+  for (n = NODE(head)->Next; n != head;)
+  {
+    CPpmd7_Node *node = NODE(n);
+    unsigned nu;
+    CPpmd7_Node_Ref next = node->Next;
+    for (nu = node->NU; nu > 128; nu -= 128, node += 128)
+      InsertNode(p, node, PPMD_NUM_INDEXES - 1);
+    if (I2U(i = U2I(nu)) != nu)
+    {
+      unsigned k = I2U(--i);
+      InsertNode(p, node + k, nu - k - 1);
+    }
+    InsertNode(p, node, i);
+    n = next;
+  }
+}
+
+static void *AllocUnitsRare(CPpmd7 *p, unsigned indx)
+{
+  unsigned i;
+  void *retVal;
+  if (p->GlueCount == 0)
+  {
+    GlueFreeBlocks(p);
+    if (p->FreeList[indx] != 0)
+      return RemoveNode(p, indx);
+  }
+  i = indx;
+  do
+  {
+    if (++i == PPMD_NUM_INDEXES)
+    {
+      UInt32 numBytes = U2B(I2U(indx));
+      p->GlueCount--;
+      return ((UInt32)(p->UnitsStart - p->Text) > numBytes) ? (p->UnitsStart -= numBytes) : (NULL);
+    }
+  }
+  while (p->FreeList[i] == 0);
+  retVal = RemoveNode(p, i);
+  SplitBlock(p, retVal, i, indx);
+  return retVal;
+}
+
+static void *AllocUnits(CPpmd7 *p, unsigned indx)
+{
+  UInt32 numBytes;
+  if (p->FreeList[indx] != 0)
+    return RemoveNode(p, indx);
+  numBytes = U2B(I2U(indx));
+  if (numBytes <= (UInt32)(p->HiUnit - p->LoUnit))
+  {
+    void *retVal = p->LoUnit;
+    p->LoUnit += numBytes;
+    return retVal;
+  }
+  return AllocUnitsRare(p, indx);
+}
+
+#define MyMem12Cpy(dest, src, num) \
+  { UInt32 *d = (UInt32 *)dest; const UInt32 *s = (const UInt32 *)src; UInt32 n = num; \
+    do { d[0] = s[0]; d[1] = s[1]; d[2] = s[2]; s += 3; d += 3; } while (--n); }
+
+static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU)
+{
+  unsigned i0 = U2I(oldNU);
+  unsigned i1 = U2I(newNU);
+  if (i0 == i1)
+    return oldPtr;
+  if (p->FreeList[i1] != 0)
+  {
+    void *ptr = RemoveNode(p, i1);
+    MyMem12Cpy(ptr, oldPtr, newNU);
+    InsertNode(p, oldPtr, i0);
+    return ptr;
+  }
+  SplitBlock(p, oldPtr, i0, i1);
+  return oldPtr;
+}
+
+#define SUCCESSOR(p) ((CPpmd_Void_Ref)((p)->SuccessorLow | ((UInt32)(p)->SuccessorHigh << 16)))
+
+static void SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v)
+{
+  (p)->SuccessorLow = (UInt16)((UInt32)(v) & 0xFFFF);
+  (p)->SuccessorHigh = (UInt16)(((UInt32)(v) >> 16) & 0xFFFF);
+}
+
+static void RestartModel(CPpmd7 *p)
+{
+  unsigned i, k, m;
+
+  memset(p->FreeList, 0, sizeof(p->FreeList));
+  p->Text = p->Base + p->AlignOffset;
+  p->HiUnit = p->Text + p->Size;
+  p->LoUnit = p->UnitsStart = p->HiUnit - p->Size / 8 / UNIT_SIZE * 7 * UNIT_SIZE;
+  p->GlueCount = 0;
+
+  p->OrderFall = p->MaxOrder;
+  p->RunLength = p->InitRL = -(Int32)((p->MaxOrder < 12) ? p->MaxOrder : 12) - 1;
+  p->PrevSuccess = 0;
+
+  p->MinContext = p->MaxContext = (CTX_PTR)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */
+  p->MinContext->Suffix = 0;
+  p->MinContext->NumStats = 256;
+  p->MinContext->SummFreq = 256 + 1;
+  p->FoundState = (CPpmd_State *)p->LoUnit; /* AllocUnits(p, PPMD_NUM_INDEXES - 1); */
+  p->LoUnit += U2B(256 / 2);
+  p->MinContext->Stats = REF(p->FoundState);
+  for (i = 0; i < 256; i++)
+  {
+    CPpmd_State *s = &p->FoundState[i];
+    s->Symbol = (Byte)i;
+    s->Freq = 1;
+    SetSuccessor(s, 0);
+  }
+
+  for (i = 0; i < 128; i++)
+    for (k = 0; k < 8; k++)
+    {
+      UInt16 *dest = p->BinSumm[i] + k;
+      UInt16 val = (UInt16)(PPMD_BIN_SCALE - kInitBinEsc[k] / (i + 2));
+      for (m = 0; m < 64; m += 8)
+        dest[m] = val;
+    }
+  
+  for (i = 0; i < 25; i++)
+    for (k = 0; k < 16; k++)
+    {
+      CPpmd_See *s = &p->See[i][k];
+      s->Summ = (UInt16)((5 * i + 10) << (s->Shift = PPMD_PERIOD_BITS - 4));
+      s->Count = 4;
+    }
+}
+
+void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder)
+{
+  p->MaxOrder = maxOrder;
+  RestartModel(p);
+  p->DummySee.Shift = PPMD_PERIOD_BITS;
+  p->DummySee.Summ = 0; /* unused */
+  p->DummySee.Count = 64; /* unused */
+}
+
+static CTX_PTR CreateSuccessors(CPpmd7 *p, BoolInt skip)
+{
+  CPpmd_State upState;
+  CTX_PTR c = p->MinContext;
+  CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState);
+  CPpmd_State *ps[PPMD7_MAX_ORDER];
+  unsigned numPs = 0;
+  
+  if (!skip)
+    ps[numPs++] = p->FoundState;
+  
+  while (c->Suffix)
+  {
+    CPpmd_Void_Ref successor;
+    CPpmd_State *s;
+    c = SUFFIX(c);
+    if (c->NumStats != 1)
+    {
+      for (s = STATS(c); s->Symbol != p->FoundState->Symbol; s++);
+    }
+    else
+      s = ONE_STATE(c);
+    successor = SUCCESSOR(s);
+    if (successor != upBranch)
+    {
+      c = CTX(successor);
+      if (numPs == 0)
+        return c;
+      break;
+    }
+    ps[numPs++] = s;
+  }
+  
+  upState.Symbol = *(const Byte *)Ppmd7_GetPtr(p, upBranch);
+  SetSuccessor(&upState, upBranch + 1);
+  
+  if (c->NumStats == 1)
+    upState.Freq = ONE_STATE(c)->Freq;
+  else
+  {
+    UInt32 cf, s0;
+    CPpmd_State *s;
+    for (s = STATS(c); s->Symbol != upState.Symbol; s++);
+    cf = s->Freq - 1;
+    s0 = c->SummFreq - c->NumStats - cf;
+    upState.Freq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : ((2 * cf + 3 * s0 - 1) / (2 * s0))));
+  }
+
+  do
+  {
+    /* Create Child */
+    CTX_PTR c1; /* = AllocContext(p); */
+    if (p->HiUnit != p->LoUnit)
+      c1 = (CTX_PTR)(p->HiUnit -= UNIT_SIZE);
+    else if (p->FreeList[0] != 0)
+      c1 = (CTX_PTR)RemoveNode(p, 0);
+    else
+    {
+      c1 = (CTX_PTR)AllocUnitsRare(p, 0);
+      if (!c1)
+        return NULL;
+    }
+    c1->NumStats = 1;
+    *ONE_STATE(c1) = upState;
+    c1->Suffix = REF(c);
+    SetSuccessor(ps[--numPs], REF(c1));
+    c = c1;
+  }
+  while (numPs != 0);
+  
+  return c;
+}
+
+static void SwapStates(CPpmd_State *t1, CPpmd_State *t2)
+{
+  CPpmd_State tmp = *t1;
+  *t1 = *t2;
+  *t2 = tmp;
+}
+
+static void UpdateModel(CPpmd7 *p)
+{
+  CPpmd_Void_Ref successor, fSuccessor = SUCCESSOR(p->FoundState);
+  CTX_PTR c;
+  unsigned s0, ns;
+  
+  if (p->FoundState->Freq < MAX_FREQ / 4 && p->MinContext->Suffix != 0)
+  {
+    c = SUFFIX(p->MinContext);
+    
+    if (c->NumStats == 1)
+    {
+      CPpmd_State *s = ONE_STATE(c);
+      if (s->Freq < 32)
+        s->Freq++;
+    }
+    else
+    {
+      CPpmd_State *s = STATS(c);
+      if (s->Symbol != p->FoundState->Symbol)
+      {
+        do { s++; } while (s->Symbol != p->FoundState->Symbol);
+        if (s[0].Freq >= s[-1].Freq)
+        {
+          SwapStates(&s[0], &s[-1]);
+          s--;
+        }
+      }
+      if (s->Freq < MAX_FREQ - 9)
+      {
+        s->Freq += 2;
+        c->SummFreq += 2;
+      }
+    }
+  }
+
+  if (p->OrderFall == 0)
+  {
+    p->MinContext = p->MaxContext = CreateSuccessors(p, True);
+    if (p->MinContext == 0)
+    {
+      RestartModel(p);
+      return;
+    }
+    SetSuccessor(p->FoundState, REF(p->MinContext));
+    return;
+  }
+  
+  *p->Text++ = p->FoundState->Symbol;
+  successor = REF(p->Text);
+  if (p->Text >= p->UnitsStart)
+  {
+    RestartModel(p);
+    return;
+  }
+  
+  if (fSuccessor)
+  {
+    if (fSuccessor <= successor)
+    {
+      CTX_PTR cs = CreateSuccessors(p, False);
+      if (cs == NULL)
+      {
+        RestartModel(p);
+        return;
+      }
+      fSuccessor = REF(cs);
+    }
+    if (--p->OrderFall == 0)
+    {
+      successor = fSuccessor;
+      p->Text -= (p->MaxContext != p->MinContext);
+    }
+  }
+  else
+  {
+    SetSuccessor(p->FoundState, successor);
+    fSuccessor = REF(p->MinContext);
+  }
+  
+  s0 = p->MinContext->SummFreq - (ns = p->MinContext->NumStats) - (p->FoundState->Freq - 1);
+  
+  for (c = p->MaxContext; c != p->MinContext; c = SUFFIX(c))
+  {
+    unsigned ns1;
+    UInt32 cf, sf;
+    if ((ns1 = c->NumStats) != 1)
+    {
+      if ((ns1 & 1) == 0)
+      {
+        /* Expand for one UNIT */
+        unsigned oldNU = ns1 >> 1;
+        unsigned i = U2I(oldNU);
+        if (i != U2I((size_t)oldNU + 1))
+        {
+          void *ptr = AllocUnits(p, i + 1);
+          void *oldPtr;
+          if (!ptr)
+          {
+            RestartModel(p);
+            return;
+          }
+          oldPtr = STATS(c);
+          MyMem12Cpy(ptr, oldPtr, oldNU);
+          InsertNode(p, oldPtr, i);
+          c->Stats = STATS_REF(ptr);
+        }
+      }
+      c->SummFreq = (UInt16)(c->SummFreq + (2 * ns1 < ns) + 2 * ((4 * ns1 <= ns) & (c->SummFreq <= 8 * ns1)));
+    }
+    else
+    {
+      CPpmd_State *s = (CPpmd_State*)AllocUnits(p, 0);
+      if (!s)
+      {
+        RestartModel(p);
+        return;
+      }
+      *s = *ONE_STATE(c);
+      c->Stats = REF(s);
+      if (s->Freq < MAX_FREQ / 4 - 1)
+        s->Freq <<= 1;
+      else
+        s->Freq = MAX_FREQ - 4;
+      c->SummFreq = (UInt16)(s->Freq + p->InitEsc + (ns > 3));
+    }
+    cf = 2 * (UInt32)p->FoundState->Freq * (c->SummFreq + 6);
+    sf = (UInt32)s0 + c->SummFreq;
+    if (cf < 6 * sf)
+    {
+      cf = 1 + (cf > sf) + (cf >= 4 * sf);
+      c->SummFreq += 3;
+    }
+    else
+    {
+      cf = 4 + (cf >= 9 * sf) + (cf >= 12 * sf) + (cf >= 15 * sf);
+      c->SummFreq = (UInt16)(c->SummFreq + cf);
+    }
+    {
+      CPpmd_State *s = STATS(c) + ns1;
+      SetSuccessor(s, successor);
+      s->Symbol = p->FoundState->Symbol;
+      s->Freq = (Byte)cf;
+      c->NumStats = (UInt16)(ns1 + 1);
+    }
+  }
+  p->MaxContext = p->MinContext = CTX(fSuccessor);
+}
+  
+static void Rescale(CPpmd7 *p)
+{
+  unsigned i, adder, sumFreq, escFreq;
+  CPpmd_State *stats = STATS(p->MinContext);
+  CPpmd_State *s = p->FoundState;
+  {
+    CPpmd_State tmp = *s;
+    for (; s != stats; s--)
+      s[0] = s[-1];
+    *s = tmp;
+  }
+  escFreq = p->MinContext->SummFreq - s->Freq;
+  s->Freq += 4;
+  adder = (p->OrderFall != 0);
+  s->Freq = (Byte)((s->Freq + adder) >> 1);
+  sumFreq = s->Freq;
+  
+  i = p->MinContext->NumStats - 1;
+  do
+  {
+    escFreq -= (++s)->Freq;
+    s->Freq = (Byte)((s->Freq + adder) >> 1);
+    sumFreq += s->Freq;
+    if (s[0].Freq > s[-1].Freq)
+    {
+      CPpmd_State *s1 = s;
+      CPpmd_State tmp = *s1;
+      do
+        s1[0] = s1[-1];
+      while (--s1 != stats && tmp.Freq > s1[-1].Freq);
+      *s1 = tmp;
+    }
+  }
+  while (--i);
+  
+  if (s->Freq == 0)
+  {
+    unsigned numStats = p->MinContext->NumStats;
+    unsigned n0, n1;
+    do { i++; } while ((--s)->Freq == 0);
+    escFreq += i;
+    p->MinContext->NumStats = (UInt16)(p->MinContext->NumStats - i);
+    if (p->MinContext->NumStats == 1)
+    {
+      CPpmd_State tmp = *stats;
+      do
+      {
+        tmp.Freq = (Byte)(tmp.Freq - (tmp.Freq >> 1));
+        escFreq >>= 1;
+      }
+      while (escFreq > 1);
+      InsertNode(p, stats, U2I(((numStats + 1) >> 1)));
+      *(p->FoundState = ONE_STATE(p->MinContext)) = tmp;
+      return;
+    }
+    n0 = (numStats + 1) >> 1;
+    n1 = (p->MinContext->NumStats + 1) >> 1;
+    if (n0 != n1)
+      p->MinContext->Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1));
+  }
+  p->MinContext->SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1));
+  p->FoundState = STATS(p->MinContext);
+}
+
+CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq)
+{
+  CPpmd_See *see;
+  unsigned nonMasked = p->MinContext->NumStats - numMasked;
+  if (p->MinContext->NumStats != 256)
+  {
+    see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]] +
+        (nonMasked < (unsigned)SUFFIX(p->MinContext)->NumStats - p->MinContext->NumStats) +
+        2 * (unsigned)(p->MinContext->SummFreq < 11 * p->MinContext->NumStats) +
+        4 * (unsigned)(numMasked > nonMasked) +
+        p->HiBitsFlag;
+    {
+      unsigned r = (see->Summ >> see->Shift);
+      see->Summ = (UInt16)(see->Summ - r);
+      *escFreq = r + (r == 0);
+    }
+  }
+  else
+  {
+    see = &p->DummySee;
+    *escFreq = 1;
+  }
+  return see;
+}
+
+static void NextContext(CPpmd7 *p)
+{
+  CTX_PTR c = CTX(SUCCESSOR(p->FoundState));
+  if (p->OrderFall == 0 && (Byte *)c > p->Text)
+    p->MinContext = p->MaxContext = c;
+  else
+    UpdateModel(p);
+}
+
+void Ppmd7_Update1(CPpmd7 *p)
+{
+  CPpmd_State *s = p->FoundState;
+  s->Freq += 4;
+  p->MinContext->SummFreq += 4;
+  if (s[0].Freq > s[-1].Freq)
+  {
+    SwapStates(&s[0], &s[-1]);
+    p->FoundState = --s;
+    if (s->Freq > MAX_FREQ)
+      Rescale(p);
+  }
+  NextContext(p);
+}
+
+void Ppmd7_Update1_0(CPpmd7 *p)
+{
+  p->PrevSuccess = (2 * p->FoundState->Freq > p->MinContext->SummFreq);
+  p->RunLength += p->PrevSuccess;
+  p->MinContext->SummFreq += 4;
+  if ((p->FoundState->Freq += 4) > MAX_FREQ)
+    Rescale(p);
+  NextContext(p);
+}
+
+void Ppmd7_UpdateBin(CPpmd7 *p)
+{
+  p->FoundState->Freq = (Byte)(p->FoundState->Freq + (p->FoundState->Freq < 128 ? 1: 0));
+  p->PrevSuccess = 1;
+  p->RunLength++;
+  NextContext(p);
+}
+
+void Ppmd7_Update2(CPpmd7 *p)
+{
+  p->MinContext->SummFreq += 4;
+  if ((p->FoundState->Freq += 4) > MAX_FREQ)
+    Rescale(p);
+  p->RunLength = p->InitRL;
+  UpdateModel(p);
+}
diff --git a/libraries/lzma/C/Ppmd7.h b/libraries/lzma/C/Ppmd7.h
new file mode 100644
index 000000000..610539a04
--- /dev/null
+++ b/libraries/lzma/C/Ppmd7.h
@@ -0,0 +1,142 @@
+/* Ppmd7.h -- PPMdH compression codec
+2018-07-04 : Igor Pavlov : Public domain
+This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
+
+/* This code supports virtual RangeDecoder and includes the implementation
+of RangeCoder from 7z, instead of RangeCoder from original PPMd var.H.
+If you need the compatibility with original PPMd var.H, you can use external RangeDecoder */
+
+#ifndef __PPMD7_H
+#define __PPMD7_H
+
+#include "Ppmd.h"
+
+EXTERN_C_BEGIN
+
+#define PPMD7_MIN_ORDER 2
+#define PPMD7_MAX_ORDER 64
+
+#define PPMD7_MIN_MEM_SIZE (1 << 11)
+#define PPMD7_MAX_MEM_SIZE (0xFFFFFFFF - 12 * 3)
+
+struct CPpmd7_Context_;
+
+typedef
+  #ifdef PPMD_32BIT
+    struct CPpmd7_Context_ *
+  #else
+    UInt32
+  #endif
+  CPpmd7_Context_Ref;
+
+typedef struct CPpmd7_Context_
+{
+  UInt16 NumStats;
+  UInt16 SummFreq;
+  CPpmd_State_Ref Stats;
+  CPpmd7_Context_Ref Suffix;
+} CPpmd7_Context;
+
+#define Ppmd7Context_OneState(p) ((CPpmd_State *)&(p)->SummFreq)
+
+typedef struct
+{
+  CPpmd7_Context *MinContext, *MaxContext;
+  CPpmd_State *FoundState;
+  unsigned OrderFall, InitEsc, PrevSuccess, MaxOrder, HiBitsFlag;
+  Int32 RunLength, InitRL; /* must be 32-bit at least */
+
+  UInt32 Size;
+  UInt32 GlueCount;
+  Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart;
+  UInt32 AlignOffset;
+
+  Byte Indx2Units[PPMD_NUM_INDEXES];
+  Byte Units2Indx[128];
+  CPpmd_Void_Ref FreeList[PPMD_NUM_INDEXES];
+  Byte NS2Indx[256], NS2BSIndx[256], HB2Flag[256];
+  CPpmd_See DummySee, See[25][16];
+  UInt16 BinSumm[128][64];
+} CPpmd7;
+
+void Ppmd7_Construct(CPpmd7 *p);
+BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc);
+void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc);
+void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder);
+#define Ppmd7_WasAllocated(p) ((p)->Base != NULL)
+
+
+/* ---------- Internal Functions ---------- */
+
+extern const Byte PPMD7_kExpEscape[16];
+
+#ifdef PPMD_32BIT
+  #define Ppmd7_GetPtr(p, ptr) (ptr)
+  #define Ppmd7_GetContext(p, ptr) (ptr)
+  #define Ppmd7_GetStats(p, ctx) ((ctx)->Stats)
+#else
+  #define Ppmd7_GetPtr(p, offs) ((void *)((p)->Base + (offs)))
+  #define Ppmd7_GetContext(p, offs) ((CPpmd7_Context *)Ppmd7_GetPtr((p), (offs)))
+  #define Ppmd7_GetStats(p, ctx) ((CPpmd_State *)Ppmd7_GetPtr((p), ((ctx)->Stats)))
+#endif
+
+void Ppmd7_Update1(CPpmd7 *p);
+void Ppmd7_Update1_0(CPpmd7 *p);
+void Ppmd7_Update2(CPpmd7 *p);
+void Ppmd7_UpdateBin(CPpmd7 *p);
+
+#define Ppmd7_GetBinSumm(p) \
+    &p->BinSumm[(size_t)(unsigned)Ppmd7Context_OneState(p->MinContext)->Freq - 1][p->PrevSuccess + \
+    p->NS2BSIndx[(size_t)Ppmd7_GetContext(p, p->MinContext->Suffix)->NumStats - 1] + \
+    (p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol]) + \
+    2 * p->HB2Flag[(unsigned)Ppmd7Context_OneState(p->MinContext)->Symbol] + \
+    ((p->RunLength >> 26) & 0x20)]
+
+CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *scale);
+
+
+/* ---------- Decode ---------- */
+
+typedef struct IPpmd7_RangeDec IPpmd7_RangeDec;
+
+struct IPpmd7_RangeDec
+{
+  UInt32 (*GetThreshold)(const IPpmd7_RangeDec *p, UInt32 total);
+  void (*Decode)(const IPpmd7_RangeDec *p, UInt32 start, UInt32 size);
+  UInt32 (*DecodeBit)(const IPpmd7_RangeDec *p, UInt32 size0);
+};
+
+typedef struct
+{
+  IPpmd7_RangeDec vt;
+  UInt32 Range;
+  UInt32 Code;
+  IByteIn *Stream;
+} CPpmd7z_RangeDec;
+
+void Ppmd7z_RangeDec_CreateVTable(CPpmd7z_RangeDec *p);
+BoolInt Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec *p);
+#define Ppmd7z_RangeDec_IsFinishedOK(p) ((p)->Code == 0)
+
+int Ppmd7_DecodeSymbol(CPpmd7 *p, const IPpmd7_RangeDec *rc);
+
+
+/* ---------- Encode ---------- */
+
+typedef struct
+{
+  UInt64 Low;
+  UInt32 Range;
+  Byte Cache;
+  UInt64 CacheSize;
+  IByteOut *Stream;
+} CPpmd7z_RangeEnc;
+
+void Ppmd7z_RangeEnc_Init(CPpmd7z_RangeEnc *p);
+void Ppmd7z_RangeEnc_FlushData(CPpmd7z_RangeEnc *p);
+
+void Ppmd7_EncodeSymbol(CPpmd7 *p, CPpmd7z_RangeEnc *rc, int symbol);
+
+EXTERN_C_END
+ 
+#endif
diff --git a/libraries/lzma/C/Ppmd7Dec.c b/libraries/lzma/C/Ppmd7Dec.c
new file mode 100644
index 000000000..311e9f9dd
--- /dev/null
+++ b/libraries/lzma/C/Ppmd7Dec.c
@@ -0,0 +1,191 @@
+/* Ppmd7Dec.c -- PPMdH Decoder
+2018-07-04 : Igor Pavlov : Public domain
+This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
+
+#include "Precomp.h"
+
+#include "Ppmd7.h"
+
+#define kTopValue (1 << 24)
+
+BoolInt Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec *p)
+{
+  unsigned i;
+  p->Code = 0;
+  p->Range = 0xFFFFFFFF;
+  if (IByteIn_Read(p->Stream) != 0)
+    return False;
+  for (i = 0; i < 4; i++)
+    p->Code = (p->Code << 8) | IByteIn_Read(p->Stream);
+  return (p->Code < 0xFFFFFFFF);
+}
+
+#define GET_Ppmd7z_RangeDec CPpmd7z_RangeDec *p = CONTAINER_FROM_VTBL(pp, CPpmd7z_RangeDec, vt);
+ 
+static UInt32 Range_GetThreshold(const IPpmd7_RangeDec *pp, UInt32 total)
+{
+  GET_Ppmd7z_RangeDec
+  return p->Code / (p->Range /= total);
+}
+
+static void Range_Normalize(CPpmd7z_RangeDec *p)
+{
+  if (p->Range < kTopValue)
+  {
+    p->Code = (p->Code << 8) | IByteIn_Read(p->Stream);
+    p->Range <<= 8;
+    if (p->Range < kTopValue)
+    {
+      p->Code = (p->Code << 8) | IByteIn_Read(p->Stream);
+      p->Range <<= 8;
+    }
+  }
+}
+
+static void Range_Decode(const IPpmd7_RangeDec *pp, UInt32 start, UInt32 size)
+{
+  GET_Ppmd7z_RangeDec
+  p->Code -= start * p->Range;
+  p->Range *= size;
+  Range_Normalize(p);
+}
+
+static UInt32 Range_DecodeBit(const IPpmd7_RangeDec *pp, UInt32 size0)
+{
+  GET_Ppmd7z_RangeDec
+  UInt32 newBound = (p->Range >> 14) * size0;
+  UInt32 symbol;
+  if (p->Code < newBound)
+  {
+    symbol = 0;
+    p->Range = newBound;
+  }
+  else
+  {
+    symbol = 1;
+    p->Code -= newBound;
+    p->Range -= newBound;
+  }
+  Range_Normalize(p);
+  return symbol;
+}
+
+void Ppmd7z_RangeDec_CreateVTable(CPpmd7z_RangeDec *p)
+{
+  p->vt.GetThreshold = Range_GetThreshold;
+  p->vt.Decode = Range_Decode;
+  p->vt.DecodeBit = Range_DecodeBit;
+}
+
+
+#define MASK(sym) ((signed char *)charMask)[sym]
+
+int Ppmd7_DecodeSymbol(CPpmd7 *p, const IPpmd7_RangeDec *rc)
+{
+  size_t charMask[256 / sizeof(size_t)];
+  if (p->MinContext->NumStats != 1)
+  {
+    CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext);
+    unsigned i;
+    UInt32 count, hiCnt;
+    if ((count = rc->GetThreshold(rc, p->MinContext->SummFreq)) < (hiCnt = s->Freq))
+    {
+      Byte symbol;
+      rc->Decode(rc, 0, s->Freq);
+      p->FoundState = s;
+      symbol = s->Symbol;
+      Ppmd7_Update1_0(p);
+      return symbol;
+    }
+    p->PrevSuccess = 0;
+    i = p->MinContext->NumStats - 1;
+    do
+    {
+      if ((hiCnt += (++s)->Freq) > count)
+      {
+        Byte symbol;
+        rc->Decode(rc, hiCnt - s->Freq, s->Freq);
+        p->FoundState = s;
+        symbol = s->Symbol;
+        Ppmd7_Update1(p);
+        return symbol;
+      }
+    }
+    while (--i);
+    if (count >= p->MinContext->SummFreq)
+      return -2;
+    p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol];
+    rc->Decode(rc, hiCnt, p->MinContext->SummFreq - hiCnt);
+    PPMD_SetAllBitsIn256Bytes(charMask);
+    MASK(s->Symbol) = 0;
+    i = p->MinContext->NumStats - 1;
+    do { MASK((--s)->Symbol) = 0; } while (--i);
+  }
+  else
+  {
+    UInt16 *prob = Ppmd7_GetBinSumm(p);
+    if (rc->DecodeBit(rc, *prob) == 0)
+    {
+      Byte symbol;
+      *prob = (UInt16)PPMD_UPDATE_PROB_0(*prob);
+      symbol = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol;
+      Ppmd7_UpdateBin(p);
+      return symbol;
+    }
+    *prob = (UInt16)PPMD_UPDATE_PROB_1(*prob);
+    p->InitEsc = PPMD7_kExpEscape[*prob >> 10];
+    PPMD_SetAllBitsIn256Bytes(charMask);
+    MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0;
+    p->PrevSuccess = 0;
+  }
+  for (;;)
+  {
+    CPpmd_State *ps[256], *s;
+    UInt32 freqSum, count, hiCnt;
+    CPpmd_See *see;
+    unsigned i, num, numMasked = p->MinContext->NumStats;
+    do
+    {
+      p->OrderFall++;
+      if (!p->MinContext->Suffix)
+        return -1;
+      p->MinContext = Ppmd7_GetContext(p, p->MinContext->Suffix);
+    }
+    while (p->MinContext->NumStats == numMasked);
+    hiCnt = 0;
+    s = Ppmd7_GetStats(p, p->MinContext);
+    i = 0;
+    num = p->MinContext->NumStats - numMasked;
+    do
+    {
+      int k = (int)(MASK(s->Symbol));
+      hiCnt += (s->Freq & k);
+      ps[i] = s++;
+      i -= k;
+    }
+    while (i != num);
+    
+    see = Ppmd7_MakeEscFreq(p, numMasked, &freqSum);
+    freqSum += hiCnt;
+    count = rc->GetThreshold(rc, freqSum);
+    
+    if (count < hiCnt)
+    {
+      Byte symbol;
+      CPpmd_State **pps = ps;
+      for (hiCnt = 0; (hiCnt += (*pps)->Freq) <= count; pps++);
+      s = *pps;
+      rc->Decode(rc, hiCnt - s->Freq, s->Freq);
+      Ppmd_See_Update(see);
+      p->FoundState = s;
+      symbol = s->Symbol;
+      Ppmd7_Update2(p);
+      return symbol;
+    }
+    if (count >= freqSum)
+      return -2;
+    rc->Decode(rc, hiCnt, freqSum - hiCnt);
+    see->Summ = (UInt16)(see->Summ + freqSum);
+    do { MASK(ps[--i]->Symbol) = 0; } while (i != 0);
+  }
+}
diff --git a/libraries/lzma/C/Precomp.h b/libraries/lzma/C/Precomp.h
new file mode 100644
index 000000000..e8ff8b40e
--- /dev/null
+++ b/libraries/lzma/C/Precomp.h
@@ -0,0 +1,10 @@
+/* Precomp.h -- StdAfx
+2013-11-12 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_PRECOMP_H
+#define __7Z_PRECOMP_H
+
+#include "Compiler.h"
+/* #include "7zTypes.h" */
+
+#endif
diff --git a/libraries/lzma/C/Threads.c b/libraries/lzma/C/Threads.c
new file mode 100644
index 000000000..930ad271b
--- /dev/null
+++ b/libraries/lzma/C/Threads.c
@@ -0,0 +1,95 @@
+/* Threads.c -- multithreading library
+2017-06-26 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#ifndef UNDER_CE
+#include <process.h>
+#endif
+
+#include "Threads.h"
+
+static WRes GetError()
+{
+  DWORD res = GetLastError();
+  return res ? (WRes)res : 1;
+}
+
+static WRes HandleToWRes(HANDLE h) { return (h != NULL) ? 0 : GetError(); }
+static WRes BOOLToWRes(BOOL v) { return v ? 0 : GetError(); }
+
+WRes HandlePtr_Close(HANDLE *p)
+{
+  if (*p != NULL)
+  {
+    if (!CloseHandle(*p))
+      return GetError();
+    *p = NULL;
+  }
+  return 0;
+}
+
+WRes Handle_WaitObject(HANDLE h) { return (WRes)WaitForSingleObject(h, INFINITE); }
+
+WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
+{
+  /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */
+  
+  #ifdef UNDER_CE
+  
+  DWORD threadId;
+  *p = CreateThread(0, 0, func, param, 0, &threadId);
+
+  #else
+
+  unsigned threadId;
+  *p = (HANDLE)_beginthreadex(NULL, 0, func, param, 0, &threadId);
+   
+  #endif
+
+  /* maybe we must use errno here, but probably GetLastError() is also OK. */
+  return HandleToWRes(*p);
+}
+
+static WRes Event_Create(CEvent *p, BOOL manualReset, int signaled)
+{
+  *p = CreateEvent(NULL, manualReset, (signaled ? TRUE : FALSE), NULL);
+  return HandleToWRes(*p);
+}
+
+WRes Event_Set(CEvent *p) { return BOOLToWRes(SetEvent(*p)); }
+WRes Event_Reset(CEvent *p) { return BOOLToWRes(ResetEvent(*p)); }
+
+WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled) { return Event_Create(p, TRUE, signaled); }
+WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled) { return Event_Create(p, FALSE, signaled); }
+WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p) { return ManualResetEvent_Create(p, 0); }
+WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) { return AutoResetEvent_Create(p, 0); }
+
+
+WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
+{
+  *p = CreateSemaphore(NULL, (LONG)initCount, (LONG)maxCount, NULL);
+  return HandleToWRes(*p);
+}
+
+static WRes Semaphore_Release(CSemaphore *p, LONG releaseCount, LONG *previousCount)
+  { return BOOLToWRes(ReleaseSemaphore(*p, releaseCount, previousCount)); }
+WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num)
+  { return Semaphore_Release(p, (LONG)num, NULL); }
+WRes Semaphore_Release1(CSemaphore *p) { return Semaphore_ReleaseN(p, 1); }
+
+WRes CriticalSection_Init(CCriticalSection *p)
+{
+  /* InitializeCriticalSection can raise only STATUS_NO_MEMORY exception */
+  #ifdef _MSC_VER
+  __try
+  #endif
+  {
+    InitializeCriticalSection(p);
+    /* InitializeCriticalSectionAndSpinCount(p, 0); */
+  }
+  #ifdef _MSC_VER
+  __except (EXCEPTION_EXECUTE_HANDLER) { return 1; }
+  #endif
+  return 0;
+}
diff --git a/libraries/lzma/C/Threads.h b/libraries/lzma/C/Threads.h
new file mode 100644
index 000000000..e53ace435
--- /dev/null
+++ b/libraries/lzma/C/Threads.h
@@ -0,0 +1,68 @@
+/* Threads.h -- multithreading library
+2017-06-18 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_THREADS_H
+#define __7Z_THREADS_H
+
+#ifdef _WIN32
+#include <windows.h>
+#endif
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+WRes HandlePtr_Close(HANDLE *h);
+WRes Handle_WaitObject(HANDLE h);
+
+typedef HANDLE CThread;
+#define Thread_Construct(p) *(p) = NULL
+#define Thread_WasCreated(p) (*(p) != NULL)
+#define Thread_Close(p) HandlePtr_Close(p)
+#define Thread_Wait(p) Handle_WaitObject(*(p))
+
+typedef
+#ifdef UNDER_CE
+  DWORD
+#else
+  unsigned
+#endif
+  THREAD_FUNC_RET_TYPE;
+
+#define THREAD_FUNC_CALL_TYPE MY_STD_CALL
+#define THREAD_FUNC_DECL THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE
+typedef THREAD_FUNC_RET_TYPE (THREAD_FUNC_CALL_TYPE * THREAD_FUNC_TYPE)(void *);
+WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param);
+
+typedef HANDLE CEvent;
+typedef CEvent CAutoResetEvent;
+typedef CEvent CManualResetEvent;
+#define Event_Construct(p) *(p) = NULL
+#define Event_IsCreated(p) (*(p) != NULL)
+#define Event_Close(p) HandlePtr_Close(p)
+#define Event_Wait(p) Handle_WaitObject(*(p))
+WRes Event_Set(CEvent *p);
+WRes Event_Reset(CEvent *p);
+WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled);
+WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p);
+WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled);
+WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p);
+
+typedef HANDLE CSemaphore;
+#define Semaphore_Construct(p) *(p) = NULL
+#define Semaphore_IsCreated(p) (*(p) != NULL)
+#define Semaphore_Close(p) HandlePtr_Close(p)
+#define Semaphore_Wait(p) Handle_WaitObject(*(p))
+WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
+WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num);
+WRes Semaphore_Release1(CSemaphore *p);
+
+typedef CRITICAL_SECTION CCriticalSection;
+WRes CriticalSection_Init(CCriticalSection *p);
+#define CriticalSection_Delete(p) DeleteCriticalSection(p)
+#define CriticalSection_Enter(p) EnterCriticalSection(p)
+#define CriticalSection_Leave(p) LeaveCriticalSection(p)
+
+EXTERN_C_END
+
+#endif
diff --git a/libraries/lzma/CMakeLists.txt b/libraries/lzma/CMakeLists.txt
new file mode 100644
index 000000000..4246a34f8
--- /dev/null
+++ b/libraries/lzma/CMakeLists.txt
@@ -0,0 +1,38 @@
+cmake_minimum_required( VERSION 2.8.7 )
+
+make_release_only()
+
+if( ZD_CMAKE_COMPILER_IS_GNUC_COMPATIBLE )
+	set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -fomit-frame-pointer" )
+endif()
+
+set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_7ZIP_PPMD_SUPPPORT" )
+
+set( LZMA_FILES
+	C/7zArcIn.c
+	C/7zBuf.c
+	C/7zCrc.c
+	C/7zCrcOpt.c
+	C/7zDec.c
+	C/7zStream.c
+	C/Bcj2.c
+	C/Bra.c
+	C/Bra86.c
+	C/BraIA64.c
+	C/CpuArch.c
+	C/Delta.c
+	C/LzFind.c
+	C/Lzma2Dec.c
+	C/LzmaDec.c
+	C/LzmaEnc.c
+	C/Ppmd7.c
+	C/Ppmd7Dec.c )
+
+if( WIN32 )
+	set( LZMA_FILES ${LZMA_FILES} C/LzFindMt.c C/Threads.c )
+else()
+	set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_7ZIP_ST" )
+endif()
+
+add_library( lzma STATIC ${LZMA_FILES} )
+target_link_libraries( lzma )
diff --git a/libraries/lzma/DOC/lzma-history.txt b/libraries/lzma/DOC/lzma-history.txt
new file mode 100644
index 000000000..48ee74813
--- /dev/null
+++ b/libraries/lzma/DOC/lzma-history.txt
@@ -0,0 +1,446 @@
+HISTORY of the LZMA SDK
+-----------------------
+
+19.00          2019-02-21
+-------------------------
+- Encryption strength for 7z archives was increased:
+  the size of random initialization vector was increased from 64-bit to 128-bit,
+  and the pseudo-random number generator was improved.
+- The bug in 7zIn.c code was fixed.
+
+
+18.06          2018-12-30
+-------------------------
+- The speed for LZMA/LZMA2 compressing was increased by 3-10%,
+  and there are minor changes in compression ratio.
+- Some bugs were fixed.
+- The bug in 7-Zip 18.02-18.05 was fixed:
+  There was memory leak in multithreading xz decoder - XzDecMt_Decode(),
+  if xz stream contains only one block.
+- The changes for MSVS compiler makefiles: 
+   - the makefiles now use "PLATFORM" macroname with values (x64, x86, arm64)
+     instead of "CPU" macroname with values (AMD64, ARM64).
+   - the makefiles by default now use static version of the run-time library.
+
+
+18.05          2018-04-30
+-------------------------
+- The speed for LZMA/LZMA2 compressing was increased 
+    by 8% for fastest/fast compression levels and 
+    by 3% for normal/maximum compression levels.
+- Previous versions of 7-Zip could work incorrectly in "Large memory pages" mode in
+  Windows 10 because of some BUG with "Large Pages" in Windows 10. 
+  Now 7-Zip doesn't use "Large Pages" on Windows 10 up to revision 1709 (16299).
+- The BUG was fixed in Lzma2Enc.c
+    Lzma2Enc_Encode2() function worked incorretly,
+      if (inStream == NULL) and the number of block threads is more than 1.
+
+
+18.03 beta     2018-03-04
+-------------------------
+- Asm\x86\LzmaDecOpt.asm: new optimized LZMA decoder written in asm 
+  for x64 with about 30% higher speed than main version of LZMA decoder written in C.
+- The speed for single-thread LZMA/LZMA2 decoder written in C was increased by 3%.
+- 7-Zip now can use multi-threading for 7z/LZMA2 decoding,
+  if there are multiple independent data chunks in LZMA2 stream.
+- 7-Zip now can use multi-threading for xz decoding,
+  if there are multiple blocks in xz stream.
+
+
+18.01          2019-01-28
+-------------------------
+- The BUG in 17.01 - 18.00 beta was fixed:
+  XzDec.c : random block unpacking and XzUnpacker_IsBlockFinished()
+  didn't work correctly for xz archives without checksum (CRC).
+
+
+18.00 beta     2019-01-10
+-------------------------
+- The BUG in xz encoder was fixed:
+  There was memory leak of 16 KB for each file compressed with 
+  xz compression method, if additional filter was used.
+
+
+17.01 beta     2017-08-28
+-------------------------
+- Minor speed optimization for LZMA2 (xz and 7z) multi-threading compression.
+  7-Zip now uses additional memory buffers for multi-block LZMA2 compression.
+  CPU utilization was slightly improved.
+- 7-zip now creates multi-block xz archives by default. Block size can be 
+  specified with -ms[Size]{m|g} switch.
+- xz decoder now can unpack random block from multi-block xz archives.
+- 7-Zip command line: @listfile now doesn't work after -- switch.
+  Use -i@listfile before -- switch instead.
+- The BUGs were fixed:
+  7-Zip 17.00 beta crashed for commands that write anti-item to 7z archive.
+
+
+17.00 beta     2017-04-29
+-------------------------
+- NewHandler.h / NewHandler.cpp: 
+    now it redefines operator new() only for old MSVC compilers (_MSC_VER < 1900).
+- C/7zTypes.h : the names of variables in interface structures were changed (vt).
+- Some bugs were fixed. 7-Zip could crash in some cases.
+- Some internal changes in code.
+
+
+16.04          2016-10-04
+-------------------------
+- The bug was fixed in DllSecur.c.
+
+
+16.03          2016-09-28
+-------------------------
+- SFX modules now use some protection against DLL preloading attack.
+- Some bugs in 7z code were fixed.
+
+
+16.02          2016-05-21
+-------------------------
+- The BUG in 16.00 - 16.01 was fixed:
+  Split Handler (SplitHandler.cpp) returned incorrect 
+  total size value (kpidSize) for split archives.
+
+
+16.01          2016-05-19
+-------------------------	
+- Some internal changes to reduce the number of compiler warnings.
+
+
+16.00          2016-05-10
+-------------------------	
+- Some bugs were fixed.
+
+
+15.12          2015-11-19
+-------------------------	
+- The BUG in C version of 7z decoder was fixed:
+  7zDec.c : SzDecodeLzma2()
+  7z decoder could mistakenly report about decoding error for some 7z archives
+  that use LZMA2 compression method.
+  The probability to get that mistaken decoding error report was about 
+  one error per 16384 solid blocks for solid blocks larger than 16 KB (compressed size). 
+- The BUG (in 9.26-15.11) in C version of 7z decoder was fixed:
+  7zArcIn.c : SzReadHeader2()
+  7z decoder worked incorrectly for 7z archives that contain 
+  empty solid blocks, that can be placed to 7z archive, if some file is 
+  unavailable for reading during archive creation.
+
+
+15.09 beta     2015-10-16
+-------------------------	
+- The BUG in LZMA / LZMA2 encoding code was fixed.
+  The BUG in LzFind.c::MatchFinder_ReadBlock() function.
+  If input data size is larger than (4 GiB - dictionary_size),
+  the following code worked incorrectly:
+  -  LZMA : LzmaEnc_MemEncode(), LzmaEncode() : LZMA encoding functions 
+     for compressing from memory to memory. 
+     That BUG is not related to LZMA encoder version that works via streams.
+  -  LZMA2 : multi-threaded version of LZMA2 encoder worked incorrectly, if 
+     default value of chunk size (CLzma2EncProps::blockSize) is changed 
+     to value larger than (4 GiB - dictionary_size).
+
+
+9.38 beta      2015-01-03
+-------------------------	
+- The BUG in 9.31-9.37 was fixed:
+  IArchiveGetRawProps interface was disabled for 7z archives.
+- The BUG in 9.26-9.36 was fixed:
+  Some code in CPP\7zip\Archive\7z\ worked correctly only under Windows.
+
+
+9.36 beta      2014-12-26
+-------------------------	
+- The BUG in command line version was fixed:
+  7-Zip created temporary archive in current folder during update archive
+  operation, if -w{Path} switch was not specified. 
+  The fixed 7-Zip creates temporary archive in folder that contains updated archive.
+- The BUG in 9.33-9.35 was fixed:
+  7-Zip silently ignored file reading errors during 7z or gz archive creation,
+  and the created archive contained only part of file that was read before error.
+  The fixed 7-Zip stops archive creation and it reports about error.
+
+
+9.35 beta      2014-12-07
+-------------------------	
+- 7zr.exe now support AES encryption.
+- SFX mudules were added to LZMA SDK
+- Some bugs were fixed.
+
+
+9.21 beta      2011-04-11
+-------------------------	
+- New class FString for file names at file systems.
+- Speed optimization in CRC code for big-endian CPUs.
+- The BUG in Lzma2Dec.c was fixed:
+    Lzma2Decode function didn't work.
+
+
+9.18 beta      2010-11-02
+-------------------------	
+- New small SFX module for installers (SfxSetup).
+
+
+9.12 beta      2010-03-24
+-------------------------
+- The BUG in LZMA SDK 9.* was fixed: LZMA2 codec didn't work,
+  if more than 10 threads were used (or more than 20 threads in some modes).
+
+
+9.11 beta      2010-03-15
+-------------------------
+- PPMd compression method support
+   
+
+9.09           2009-12-12
+-------------------------
+- The bug was fixed:
+   Utf16_To_Utf8 funstions in UTFConvert.cpp and 7zMain.c
+   incorrectly converted surrogate characters (the code >= 0x10000) to UTF-8.
+- Some bugs were fixed
+
+
+9.06           2009-08-17
+-------------------------
+- Some changes in ANSI-C 7z Decoder interfaces.
+
+
+9.04           2009-05-30
+-------------------------
+- LZMA2 compression method support
+- xz format support
+
+
+4.65           2009-02-03
+-------------------------
+- Some minor fixes
+
+
+4.63           2008-12-31
+-------------------------
+- Some minor fixes
+
+
+4.61 beta      2008-11-23
+-------------------------
+- The bug in ANSI-C LZMA Decoder was fixed:
+    If encoded stream was corrupted, decoder could access memory 
+    outside of allocated range.
+- Some changes in ANSI-C 7z Decoder interfaces.
+- LZMA SDK is placed in the public domain.
+
+
+4.60 beta      2008-08-19
+-------------------------
+- Some minor fixes.
+
+
+4.59 beta      2008-08-13
+-------------------------
+- The bug was fixed:
+    LZMA Encoder in fast compression mode could access memory outside of 
+    allocated range in some rare cases.
+
+
+4.58 beta      2008-05-05
+-------------------------
+- ANSI-C LZMA Decoder was rewritten for speed optimizations.
+- ANSI-C LZMA Encoder was included to LZMA SDK.
+- C++ LZMA code now is just wrapper over ANSI-C code.
+
+
+4.57           2007-12-12
+-------------------------
+- Speed optimizations in �++ LZMA Decoder. 
+- Small changes for more compatibility with some C/C++ compilers.
+
+
+4.49 beta      2007-07-05
+-------------------------
+- .7z ANSI-C Decoder:
+     - now it supports BCJ and BCJ2 filters
+     - now it supports files larger than 4 GB.
+     - now it supports "Last Write Time" field for files.
+- C++ code for .7z archives compressing/decompressing from 7-zip 
+  was included to LZMA SDK.
+  
+
+4.43           2006-06-04
+-------------------------
+- Small changes for more compatibility with some C/C++ compilers.
+  
+
+4.42           2006-05-15
+-------------------------
+- Small changes in .h files in ANSI-C version.
+  
+
+4.39 beta      2006-04-14
+-------------------------
+- The bug in versions 4.33b:4.38b was fixed:
+  C++ version of LZMA encoder could not correctly compress 
+  files larger than 2 GB with HC4 match finder (-mfhc4).
+  
+
+4.37 beta      2005-04-06
+-------------------------
+- Fixes in C++ code: code could no be compiled if _NO_EXCEPTIONS was defined. 
+
+
+4.35 beta      2005-03-02
+-------------------------
+- The bug was fixed in C++ version of LZMA Decoder:
+    If encoded stream was corrupted, decoder could access memory 
+    outside of allocated range.
+
+
+4.34 beta      2006-02-27
+-------------------------
+- Compressing speed and memory requirements for compressing were increased
+- LZMA now can use only these match finders: HC4, BT2, BT3, BT4
+
+
+4.32           2005-12-09
+-------------------------
+- Java version of LZMA SDK was included
+
+
+4.30           2005-11-20
+-------------------------
+- Compression ratio was improved in -a2 mode
+- Speed optimizations for compressing in -a2 mode
+- -fb switch now supports values up to 273
+- The bug in 7z_C (7zIn.c) was fixed:
+  It used Alloc/Free functions from different memory pools.
+  So if program used two memory pools, it worked incorrectly.
+- 7z_C: .7z format supporting was improved
+- LZMA# SDK (C#.NET version) was included
+
+
+4.27 (Updated) 2005-09-21
+-------------------------
+- Some GUIDs/interfaces in C++ were changed.
+ IStream.h:
+   ISequentialInStream::Read now works as old ReadPart
+   ISequentialOutStream::Write now works as old WritePart
+
+
+4.27           2005-08-07
+-------------------------
+- The bug in LzmaDecodeSize.c was fixed:
+   if _LZMA_IN_CB and _LZMA_OUT_READ were defined,
+   decompressing worked incorrectly.
+
+
+4.26           2005-08-05
+-------------------------
+- Fixes in 7z_C code and LzmaTest.c:
+  previous versions could work incorrectly,
+  if malloc(0) returns 0
+
+
+4.23           2005-06-29
+-------------------------
+- Small fixes in C++ code
+
+
+4.22           2005-06-10
+-------------------------
+- Small fixes
+
+
+4.21           2005-06-08
+-------------------------
+- Interfaces for ANSI-C LZMA Decoder (LzmaDecode.c) were changed
+- New additional version of ANSI-C LZMA Decoder with zlib-like interface:
+    - LzmaStateDecode.h
+    - LzmaStateDecode.c
+    - LzmaStateTest.c
+- ANSI-C LZMA Decoder now can decompress files larger than 4 GB
+
+
+4.17           2005-04-18
+-------------------------
+- New example for RAM->RAM compressing/decompressing: 
+  LZMA + BCJ (filter for x86 code):
+    - LzmaRam.h
+    - LzmaRam.cpp
+    - LzmaRamDecode.h
+    - LzmaRamDecode.c
+    - -f86 switch for lzma.exe
+
+
+4.16           2005-03-29
+-------------------------
+- The bug was fixed in LzmaDecode.c (ANSI-C LZMA Decoder): 
+   If _LZMA_OUT_READ was defined, and if encoded stream was corrupted,
+   decoder could access memory outside of allocated range.
+- Speed optimization of ANSI-C LZMA Decoder (now it's about 20% faster).
+  Old version of LZMA Decoder now is in file LzmaDecodeSize.c. 
+  LzmaDecodeSize.c can provide slightly smaller code than LzmaDecode.c
+- Small speed optimization in LZMA C++ code
+- filter for SPARC's code was added
+- Simplified version of .7z ANSI-C Decoder was included
+
+
+4.06           2004-09-05
+-------------------------
+- The bug in v4.05 was fixed:
+    LZMA-Encoder didn't release output stream in some cases.
+
+
+4.05           2004-08-25
+-------------------------
+- Source code of filters for x86, IA-64, ARM, ARM-Thumb 
+  and PowerPC code was included to SDK
+- Some internal minor changes
+
+
+4.04           2004-07-28
+-------------------------
+- More compatibility with some C++ compilers
+
+
+4.03           2004-06-18
+-------------------------
+- "Benchmark" command was added. It measures compressing 
+  and decompressing speed and shows rating values. 
+  Also it checks hardware errors.
+
+
+4.02           2004-06-10
+-------------------------
+- C++ LZMA Encoder/Decoder code now is more portable
+  and it can be compiled by GCC on Linux.
+
+
+4.01           2004-02-15
+-------------------------
+- Some detection of data corruption was enabled.
+    LzmaDecode.c / RangeDecoderReadByte
+    .....
+    {
+      rd->ExtraBytes = 1;
+      return 0xFF;
+    }
+
+
+4.00           2004-02-13
+-------------------------
+- Original version of LZMA SDK
+
+
+
+HISTORY of the LZMA
+-------------------
+  2001-2008:  Improvements to LZMA compressing/decompressing code, 
+              keeping compatibility with original LZMA format
+  1996-2001:  Development of LZMA compression format
+
+  Some milestones:
+
+  2001-08-30: LZMA compression was added to 7-Zip
+  1999-01-02: First version of 7-Zip was released
+  
+
+End of document
diff --git a/libraries/lzma/DOC/lzma-sdk.txt b/libraries/lzma/DOC/lzma-sdk.txt
new file mode 100644
index 000000000..b0e14a2e2
--- /dev/null
+++ b/libraries/lzma/DOC/lzma-sdk.txt
@@ -0,0 +1,357 @@
+LZMA SDK 19.00
+--------------
+
+LZMA SDK provides the documentation, samples, header files,
+libraries, and tools you need to develop applications that 
+use 7z / LZMA / LZMA2 / XZ compression.
+
+LZMA is an improved version of famous LZ77 compression algorithm. 
+It was improved in way of maximum increasing of compression ratio,
+keeping high decompression speed and low memory requirements for 
+decompressing.
+
+LZMA2 is a LZMA based compression method. LZMA2 provides better 
+multithreading support for compression than LZMA and some other improvements.
+
+7z is a file format for data compression and file archiving.
+7z is a main file format for 7-Zip compression program (www.7-zip.org).
+7z format supports different compression methods: LZMA, LZMA2 and others.
+7z also supports AES-256 based encryption.
+
+XZ is a file format for data compression that uses LZMA2 compression.
+XZ format provides additional features: SHA/CRC check, filters for 
+improved compression ratio, splitting to blocks and streams,
+
+
+
+LICENSE
+-------
+
+LZMA SDK is written and placed in the public domain by Igor Pavlov.
+
+Some code in LZMA SDK is based on public domain code from another developers:
+  1) PPMd var.H (2001): Dmitry Shkarin
+  2) SHA-256: Wei Dai (Crypto++ library)
+
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute the 
+original LZMA SDK code, either in source code form or as a compiled binary, for 
+any purpose, commercial or non-commercial, and by any means.
+
+LZMA SDK code is compatible with open source licenses, for example, you can 
+include it to GNU GPL or GNU LGPL code.
+
+
+LZMA SDK Contents
+-----------------
+
+  Source code:
+
+    - C / C++ / C# / Java   - LZMA compression and decompression
+    - C / C++               - LZMA2 compression and decompression
+    - C / C++               - XZ compression and decompression
+    - C                     - 7z decompression
+    -     C++               - 7z compression and decompression
+    - C                     - small SFXs for installers (7z decompression)
+    -     C++               - SFXs and SFXs for installers (7z decompression)
+
+  Precomiled binaries:
+
+    - console programs for lzma / 7z / xz compression and decompression
+    - SFX modules for installers.
+
+
+UNIX/Linux version 
+------------------
+To compile C++ version of file->file LZMA encoding, go to directory
+CPP/7zip/Bundles/LzmaCon
+and call make to recompile it:
+  make -f makefile.gcc clean all
+
+In some UNIX/Linux versions you must compile LZMA with static libraries.
+To compile with static libraries, you can use 
+LIB = -lm -static
+
+Also you can use p7zip (port of 7-Zip for POSIX systems like Unix or Linux):
+  
+  http://p7zip.sourceforge.net/
+
+
+Files
+-----
+
+DOC/7zC.txt          - 7z ANSI-C Decoder description
+DOC/7zFormat.txt     - 7z Format description
+DOC/installer.txt    - information about 7-Zip for installers
+DOC/lzma.txt         - LZMA compression description
+DOC/lzma-sdk.txt     - LZMA SDK description (this file)
+DOC/lzma-history.txt - history of LZMA SDK
+DOC/lzma-specification.txt - Specification of LZMA
+DOC/Methods.txt      - Compression method IDs for .7z
+
+bin/installer/   - example script to create installer that uses SFX module,
+
+bin/7zdec.exe    - simplified 7z archive decoder
+bin/7zr.exe      - 7-Zip console program (reduced version)
+bin/x64/7zr.exe  - 7-Zip console program (reduced version) (x64 version)
+bin/lzma.exe     - file->file LZMA encoder/decoder for Windows
+bin/7zS2.sfx     - small SFX module for installers (GUI version)
+bin/7zS2con.sfx  - small SFX module for installers (Console version)
+bin/7zSD.sfx     - SFX module for installers.
+
+
+7zDec.exe
+---------
+7zDec.exe is simplified 7z archive decoder.
+It supports only LZMA, LZMA2, and PPMd methods.
+7zDec decodes whole solid block from 7z archive to RAM.
+The RAM consumption can be high.
+
+
+
+
+Source code structure
+---------------------
+
+
+Asm/ - asm files (optimized code for CRC calculation and Intel-AES encryption)
+
+C/  - C files (compression / decompression and other)
+  Util/
+    7z       - 7z decoder program (decoding 7z files)
+    Lzma     - LZMA program (file->file LZMA encoder/decoder).
+    LzmaLib  - LZMA library (.DLL for Windows)
+    SfxSetup - small SFX module for installers 
+
+CPP/ -- CPP files
+
+  Common  - common files for C++ projects
+  Windows - common files for Windows related code
+
+  7zip    - files related to 7-Zip
+
+    Archive - files related to archiving
+
+      Common   - common files for archive handling
+      7z       - 7z C++ Encoder/Decoder
+
+    Bundles  - Modules that are bundles of other modules (files)
+  
+      Alone7z       - 7zr.exe: Standalone 7-Zip console program (reduced version)
+      Format7zExtractR  - 7zxr.dll: Reduced version of 7z DLL: extracting from 7z/LZMA/BCJ/BCJ2.
+      Format7zR         - 7zr.dll:  Reduced version of 7z DLL: extracting/compressing to 7z/LZMA/BCJ/BCJ2
+      LzmaCon       - lzma.exe: LZMA compression/decompression
+      LzmaSpec      - example code for LZMA Specification
+      SFXCon        - 7zCon.sfx: Console 7z SFX module
+      SFXSetup      - 7zS.sfx: 7z SFX module for installers
+      SFXWin        - 7z.sfx: GUI 7z SFX module
+
+    Common   - common files for 7-Zip
+
+    Compress - files for compression/decompression
+
+    Crypto   - files for encryption / decompression
+
+    UI       - User Interface files
+         
+      Client7z - Test application for 7za.dll, 7zr.dll, 7zxr.dll
+      Common   - Common UI files
+      Console  - Code for console program (7z.exe)
+      Explorer    - Some code from 7-Zip Shell extension
+      FileManager - Some GUI code from 7-Zip File Manager
+      GUI         - Some GUI code from 7-Zip
+
+
+CS/ - C# files
+  7zip
+    Common   - some common files for 7-Zip
+    Compress - files related to compression/decompression
+      LZ     - files related to LZ (Lempel-Ziv) compression algorithm
+      LZMA         - LZMA compression/decompression
+      LzmaAlone    - file->file LZMA compression/decompression
+      RangeCoder   - Range Coder (special code of compression/decompression)
+
+Java/  - Java files
+  SevenZip
+    Compression    - files related to compression/decompression
+      LZ           - files related to LZ (Lempel-Ziv) compression algorithm
+      LZMA         - LZMA compression/decompression
+      RangeCoder   - Range Coder (special code of compression/decompression)
+
+
+Note: 
+  Asm / C / C++ source code of LZMA SDK is part of 7-Zip's source code.
+  7-Zip's source code can be downloaded from 7-Zip's SourceForge page:
+
+  http://sourceforge.net/projects/sevenzip/
+
+
+
+LZMA features
+-------------
+  - Variable dictionary size (up to 1 GB)
+  - Estimated compressing speed: about 2 MB/s on 2 GHz CPU
+  - Estimated decompressing speed: 
+      - 20-30 MB/s on modern 2 GHz cpu
+      - 1-2 MB/s on 200 MHz simple RISC cpu: (ARM, MIPS, PowerPC)
+  - Small memory requirements for decompressing (16 KB + DictionarySize)
+  - Small code size for decompressing: 5-8 KB
+
+LZMA decoder uses only integer operations and can be 
+implemented in any modern 32-bit CPU (or on 16-bit CPU with some conditions).
+
+Some critical operations that affect the speed of LZMA decompression:
+  1) 32*16 bit integer multiply
+  2) Mispredicted branches (penalty mostly depends from pipeline length)
+  3) 32-bit shift and arithmetic operations
+
+The speed of LZMA decompressing mostly depends from CPU speed.
+Memory speed has no big meaning. But if your CPU has small data cache, 
+overall weight of memory speed will slightly increase.
+
+
+How To Use
+----------
+
+Using LZMA encoder/decoder executable
+--------------------------------------
+
+Usage:  LZMA <e|d> inputFile outputFile [<switches>...]
+
+  e: encode file
+
+  d: decode file
+
+  b: Benchmark. There are two tests: compressing and decompressing 
+     with LZMA method. Benchmark shows rating in MIPS (million 
+     instructions per second). Rating value is calculated from 
+     measured speed and it is normalized with Intel's Core 2 results.
+     Also Benchmark checks possible hardware errors (RAM 
+     errors in most cases). Benchmark uses these settings:
+     (-a1, -d21, -fb32, -mfbt4). You can change only -d parameter. 
+     Also you can change the number of iterations. Example for 30 iterations:
+       LZMA b 30
+     Default number of iterations is 10.
+
+<Switches>
+  
+
+  -a{N}:  set compression mode 0 = fast, 1 = normal
+          default: 1 (normal)
+
+  d{N}:   Sets Dictionary size - [0, 30], default: 23 (8MB)
+          The maximum value for dictionary size is 1 GB = 2^30 bytes.
+          Dictionary size is calculated as DictionarySize = 2^N bytes. 
+          For decompressing file compressed by LZMA method with dictionary 
+          size D = 2^N you need about D bytes of memory (RAM).
+
+  -fb{N}: set number of fast bytes - [5, 273], default: 128
+          Usually big number gives a little bit better compression ratio 
+          and slower compression process.
+
+  -lc{N}: set number of literal context bits - [0, 8], default: 3
+          Sometimes lc=4 gives gain for big files.
+
+  -lp{N}: set number of literal pos bits - [0, 4], default: 0
+          lp switch is intended for periodical data when period is 
+          equal 2^N. For example, for 32-bit (4 bytes) 
+          periodical data you can use lp=2. Often it's better to set lc0, 
+          if you change lp switch.
+
+  -pb{N}: set number of pos bits - [0, 4], default: 2
+          pb switch is intended for periodical data 
+          when period is equal 2^N.
+
+  -mf{MF_ID}: set Match Finder. Default: bt4. 
+              Algorithms from hc* group doesn't provide good compression 
+              ratio, but they often works pretty fast in combination with 
+              fast mode (-a0).
+
+              Memory requirements depend from dictionary size 
+              (parameter "d" in table below). 
+
+               MF_ID     Memory                   Description
+
+                bt2    d *  9.5 + 4MB  Binary Tree with 2 bytes hashing.
+                bt3    d * 11.5 + 4MB  Binary Tree with 3 bytes hashing.
+                bt4    d * 11.5 + 4MB  Binary Tree with 4 bytes hashing.
+                hc4    d *  7.5 + 4MB  Hash Chain with 4 bytes hashing.
+
+  -eos:   write End Of Stream marker. By default LZMA doesn't write 
+          eos marker, since LZMA decoder knows uncompressed size 
+          stored in .lzma file header.
+
+  -si:    Read data from stdin (it will write End Of Stream marker).
+  -so:    Write data to stdout
+
+
+Examples:
+
+1) LZMA e file.bin file.lzma -d16 -lc0 
+
+compresses file.bin to file.lzma with 64 KB dictionary (2^16=64K)  
+and 0 literal context bits. -lc0 allows to reduce memory requirements 
+for decompression.
+
+
+2) LZMA e file.bin file.lzma -lc0 -lp2
+
+compresses file.bin to file.lzma with settings suitable 
+for 32-bit periodical data (for example, ARM or MIPS code).
+
+3) LZMA d file.lzma file.bin
+
+decompresses file.lzma to file.bin.
+
+
+Compression ratio hints
+-----------------------
+
+Recommendations
+---------------
+
+To increase the compression ratio for LZMA compressing it's desirable 
+to have aligned data (if it's possible) and also it's desirable to locate
+data in such order, where code is grouped in one place and data is 
+grouped in other place (it's better than such mixing: code, data, code,
+data, ...).
+
+
+Filters
+-------
+You can increase the compression ratio for some data types, using
+special filters before compressing. For example, it's possible to 
+increase the compression ratio on 5-10% for code for those CPU ISAs: 
+x86, IA-64, ARM, ARM-Thumb, PowerPC, SPARC.
+
+You can find C source code of such filters in C/Bra*.* files
+
+You can check the compression ratio gain of these filters with such 
+7-Zip commands (example for ARM code):
+No filter:
+  7z a a1.7z a.bin -m0=lzma
+
+With filter for little-endian ARM code:
+  7z a a2.7z a.bin -m0=arm -m1=lzma        
+
+It works in such manner:
+Compressing    = Filter_encoding + LZMA_encoding
+Decompressing  = LZMA_decoding + Filter_decoding
+
+Compressing and decompressing speed of such filters is very high,
+so it will not increase decompressing time too much.
+Moreover, it reduces decompression time for LZMA_decoding, 
+since compression ratio with filtering is higher.
+
+These filters convert CALL (calling procedure) instructions 
+from relative offsets to absolute addresses, so such data becomes more 
+compressible.
+
+For some ISAs (for example, for MIPS) it's impossible to get gain from such filter.
+
+
+
+---
+
+http://www.7-zip.org
+http://www.7-zip.org/sdk.html
+http://www.7-zip.org/support.html
diff --git a/libraries/zlib/CMakeLists.txt b/libraries/zlib/CMakeLists.txt
new file mode 100644
index 000000000..a1d6637a9
--- /dev/null
+++ b/libraries/zlib/CMakeLists.txt
@@ -0,0 +1,197 @@
+cmake_minimum_required(VERSION 2.8.7)
+set(CMAKE_ALLOW_LOOSE_LOOP_CONSTRUCTS ON)
+
+make_release_only()
+
+project(zlib C)
+
+set(VERSION "1.2.7")
+
+if(NOT DEFINED BUILD_SHARED_LIBS)
+    option(BUILD_SHARED_LIBS "Build a shared library form of zlib" OFF)
+endif()
+
+set(ZLIBNAME z)
+
+include(CheckTypeSize)
+include(CheckFunctionExists)
+include(CheckIncludeFile)
+include(CheckCSourceCompiles)
+enable_testing()
+
+check_include_file(sys/types.h HAVE_SYS_TYPES_H)
+check_include_file(stdint.h    HAVE_STDINT_H)
+check_include_file(stddef.h    HAVE_STDDEF_H)
+
+#
+# Check to see if we have large file support
+#
+set(CMAKE_REQUIRED_DEFINITIONS -D_LARGEFILE64_SOURCE=1)
+# We add these other definitions here because CheckTypeSize.cmake
+# in CMake 2.4.x does not automatically do so and we want
+# compatibility with CMake 2.4.x.
+if(HAVE_SYS_TYPES_H)
+    list(APPEND CMAKE_REQUIRED_DEFINITIONS -DHAVE_SYS_TYPES_H)
+endif()
+if(HAVE_STDINT_H)
+    list(APPEND CMAKE_REQUIRED_DEFINITIONS -DHAVE_STDINT_H)
+endif()
+if(HAVE_STDDEF_H)
+    list(APPEND CMAKE_REQUIRED_DEFINITIONS -DHAVE_STDDEF_H)
+endif()
+check_type_size(off64_t OFF64_T)
+if(HAVE_OFF64_T)
+   add_definitions(-D_LARGEFILE64_SOURCE=1)
+endif()
+set(CMAKE_REQUIRED_DEFINITIONS) # clear variable
+
+#
+# Check for fseeko
+#
+check_function_exists(fseeko HAVE_FSEEKO)
+if(NOT HAVE_FSEEKO)
+    add_definitions(-DNO_FSEEKO)
+endif()
+
+#
+# Check for unistd.h
+#
+check_include_file(unistd.h Z_HAVE_UNISTD_H)
+
+if(MSVC)
+    set(CMAKE_DEBUG_POSTFIX "d")
+    add_definitions(-D_CRT_SECURE_NO_DEPRECATE)
+    add_definitions(-D_CRT_NONSTDC_NO_DEPRECATE)
+    include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+endif()
+
+#if(NOT CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR)
+#    # If we're doing an out of source build and the user has a zconf.h
+#    # in their source tree...
+#    if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/zconf.h)
+#        message(FATAL_ERROR
+#            "You must remove ${CMAKE_CURRENT_SOURCE_DIR}/zconf.h "
+#            "from the source tree.  This file is included with zlib "
+#            "but CMake generates this file for you automatically "
+#            "in the build directory.")
+#  endif()
+#endif()
+#
+#configure_file(${CMAKE_CURRENT_SOURCE_DIR}/zconf.h.cmakein
+#               ${CMAKE_CURRENT_BINARY_DIR}/zconf.h @ONLY)
+#include_directories(${CMAKE_CURRENT_BINARY_DIR})
+
+
+#============================================================================
+# zlib
+#============================================================================
+
+set(ZLIB_PUBLIC_HDRS
+    zconf.h
+    zlib.h
+)
+set(ZLIB_PRIVATE_HDRS
+    crc32.h
+    deflate.h
+    gzguts.h
+    inffast.h
+    inffixed.h
+    inflate.h
+    inftrees.h
+    trees.h
+    zutil.h
+)
+set(ZLIB_SRCS
+    adler32.c
+    compress.c
+    crc32.c
+    deflate.c
+#   gzclose.c
+#   gzlib.c
+#   gzread.c
+#   gzwrite.c
+    inflate.c
+    infback.c
+    inftrees.c
+    inffast.c
+    trees.c
+    uncompr.c
+    zutil.c
+#   win32/zlib1.rc
+)
+
+# parse the full version number from zlib.h and include in ZLIB_FULL_VERSION
+file(READ ${CMAKE_CURRENT_SOURCE_DIR}/zlib.h _zlib_h_contents)
+string(REGEX REPLACE ".*#define[ \t]+ZLIB_VERSION[ \t]+\"([0-9A-Za-z.]+)\".*"
+    "\\1" ZLIB_FULL_VERSION ${_zlib_h_contents})
+
+if(MINGW)
+    # This gets us DLL resource information when compiling on MinGW.
+    add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/zlib1rc.obj
+                       COMMAND windres.exe
+                            -D GCC_WINDRES
+                            -I ${CMAKE_CURRENT_SOURCE_DIR}
+                            -I ${CMAKE_CURRENT_BINARY_DIR}
+                            -o ${CMAKE_CURRENT_BINARY_DIR}/zlib1rc.obj
+                            -i ${CMAKE_CURRENT_SOURCE_DIR}/win32/zlib1.rc)
+    set(ZLIB_SRCS ${ZLIB_SRCS} ${CMAKE_CURRENT_BINARY_DIR}/zlib1rc.obj)
+endif()
+
+add_library(${ZLIBNAME} STATIC ${ZLIB_SRCS} ${ZLIB_PUBLIC_HDRS} ${ZLIB_PRIVATE_HDRS})
+set_target_properties(${ZLIBNAME} PROPERTIES DEFINE_SYMBOL ZLIB_DLL)
+
+set_target_properties(${ZLIBNAME} PROPERTIES SOVERSION 1)
+
+if(NOT CYGWIN)
+    # This property causes shared libraries on Linux to have the full version
+    # encoded into their final filename.  We disable this on Cygwin because
+    # it causes cygz-${ZLIB_FULL_VERSION}.dll to be created when cygz.dll
+    # seems to be the default.
+    #
+    # This has no effect with MSVC, on that platform the version info for
+    # the DLL comes from the resource file win32/zlib1.rc
+    set_target_properties(${ZLIBNAME} PROPERTIES VERSION ${ZLIB_FULL_VERSION})
+endif()
+
+if(BUILD_SHARED_LIBS AND WIN32)
+    # Creates zlib1.dll when building shared library version
+    set_target_properties(${ZLIBNAME} PROPERTIES SUFFIX "1.dll")
+else()
+    # On unix-like platforms the library is almost always called libz
+    set_target_properties(${ZLIBNAME} PROPERTIES OUTPUT_NAME z)
+endif()
+
+if(NOT SKIP_INSTALL_LIBRARIES AND NOT SKIP_INSTALL_ALL )
+    install(TARGETS ${ZLIBNAME}
+        RUNTIME DESTINATION bin
+        ARCHIVE DESTINATION lib
+        LIBRARY DESTINATION lib )
+endif()
+if(NOT SKIP_INSTALL_HEADERS AND NOT SKIP_INSTALL_ALL )
+    install(FILES ${ZLIB_PUBLIC_HDRS} DESTINATION include)
+endif()
+if(NOT SKIP_INSTALL_FILES AND NOT SKIP_INSTALL_ALL )
+    install(FILES zlib.3 DESTINATION share/man/man3)
+endif()
+
+#============================================================================
+# Example binaries
+#============================================================================
+
+#add_executable(example example.c)
+#target_link_libraries(example ${ZLIBNAME})
+#add_test(example example)
+
+#add_executable(minigzip minigzip.c)
+#target_link_libraries(minigzip ${ZLIBNAME})
+
+#if(HAVE_OFF64_T)
+#    add_executable(example64 example.c)
+#    target_link_libraries(example64 ${ZLIBNAME})
+#    set_target_properties(example64 PROPERTIES COMPILE_FLAGS "-D_FILE_OFFSET_BITS=64")
+#    add_test(example64 example64)
+#
+#    add_executable(minigzip64 minigzip.c)
+#    target_link_libraries(minigzip64 ${ZLIBNAME})
+#    set_target_properties(minigzip64 PROPERTIES COMPILE_FLAGS "-D_FILE_OFFSET_BITS=64")
+#endif()
diff --git a/libraries/zlib/ChangeLog b/libraries/zlib/ChangeLog
new file mode 100644
index 000000000..30199a65a
--- /dev/null
+++ b/libraries/zlib/ChangeLog
@@ -0,0 +1,1515 @@
+
+                ChangeLog file for zlib
+
+Changes in 1.2.11 (15 Jan 2017)
+- Fix deflate stored bug when pulling last block from window
+- Permit immediate deflateParams changes before any deflate input
+
+Changes in 1.2.10 (2 Jan 2017)
+- Avoid warnings on snprintf() return value
+- Fix bug in deflate_stored() for zero-length input
+- Fix bug in gzwrite.c that produced corrupt gzip files
+- Remove files to be installed before copying them in Makefile.in
+- Add warnings when compiling with assembler code
+
+Changes in 1.2.9 (31 Dec 2016)
+- Fix contrib/minizip to permit unzipping with desktop API [Zouzou]
+- Improve contrib/blast to return unused bytes
+- Assure that gzoffset() is correct when appending
+- Improve compress() and uncompress() to support large lengths
+- Fix bug in test/example.c where error code not saved
+- Remedy Coverity warning [Randers-Pehrson]
+- Improve speed of gzprintf() in transparent mode
+- Fix inflateInit2() bug when windowBits is 16 or 32
+- Change DEBUG macro to ZLIB_DEBUG
+- Avoid uninitialized access by gzclose_w()
+- Allow building zlib outside of the source directory
+- Fix bug that accepted invalid zlib header when windowBits is zero
+- Fix gzseek() problem on MinGW due to buggy _lseeki64 there
+- Loop on write() calls in gzwrite.c in case of non-blocking I/O
+- Add --warn (-w) option to ./configure for more compiler warnings
+- Reject a window size of 256 bytes if not using the zlib wrapper
+- Fix bug when level 0 used with Z_HUFFMAN or Z_RLE
+- Add --debug (-d) option to ./configure to define ZLIB_DEBUG
+- Fix bugs in creating a very large gzip header
+- Add uncompress2() function, which returns the input size used
+- Assure that deflateParams() will not switch functions mid-block
+- Dramatically speed up deflation for level 0 (storing)
+- Add gzfread(), duplicating the interface of fread()
+- Add gzfwrite(), duplicating the interface of fwrite()
+- Add deflateGetDictionary() function
+- Use snprintf() for later versions of Microsoft C
+- Fix *Init macros to use z_ prefix when requested
+- Replace as400 with os400 for OS/400 support [Monnerat]
+- Add crc32_z() and adler32_z() functions with size_t lengths
+- Update Visual Studio project files [AraHaan]
+
+Changes in 1.2.8 (28 Apr 2013)
+- Update contrib/minizip/iowin32.c for Windows RT [Vollant]
+- Do not force Z_CONST for C++
+- Clean up contrib/vstudio [Roß]
+- Correct spelling error in zlib.h
+- Fix mixed line endings in contrib/vstudio
+
+Changes in 1.2.7.3 (13 Apr 2013)
+- Fix version numbers and DLL names in contrib/vstudio/*/zlib.rc
+
+Changes in 1.2.7.2 (13 Apr 2013)
+- Change check for a four-byte type back to hexadecimal
+- Fix typo in win32/Makefile.msc
+- Add casts in gzwrite.c for pointer differences
+
+Changes in 1.2.7.1 (24 Mar 2013)
+- Replace use of unsafe string functions with snprintf if available
+- Avoid including stddef.h on Windows for Z_SOLO compile [Niessink]
+- Fix gzgetc undefine when Z_PREFIX set [Turk]
+- Eliminate use of mktemp in Makefile (not always available)
+- Fix bug in 'F' mode for gzopen()
+- Add inflateGetDictionary() function
+- Correct comment in deflate.h
+- Use _snprintf for snprintf in Microsoft C
+- On Darwin, only use /usr/bin/libtool if libtool is not Apple
+- Delete "--version" file if created by "ar --version" [Richard G.]
+- Fix configure check for veracity of compiler error return codes
+- Fix CMake compilation of static lib for MSVC2010 x64
+- Remove unused variable in infback9.c
+- Fix argument checks in gzlog_compress() and gzlog_write()
+- Clean up the usage of z_const and respect const usage within zlib
+- Clean up examples/gzlog.[ch] comparisons of different types
+- Avoid shift equal to bits in type (caused endless loop)
+- Fix uninitialized value bug in gzputc() introduced by const patches
+- Fix memory allocation error in examples/zran.c [Nor]
+- Fix bug where gzopen(), gzclose() would write an empty file
+- Fix bug in gzclose() when gzwrite() runs out of memory
+- Check for input buffer malloc failure in examples/gzappend.c
+- Add note to contrib/blast to use binary mode in stdio
+- Fix comparisons of differently signed integers in contrib/blast
+- Check for invalid code length codes in contrib/puff
+- Fix serious but very rare decompression bug in inftrees.c
+- Update inflateBack() comments, since inflate() can be faster
+- Use underscored I/O function names for WINAPI_FAMILY
+- Add _tr_flush_bits to the external symbols prefixed by --zprefix
+- Add contrib/vstudio/vc10 pre-build step for static only
+- Quote --version-script argument in CMakeLists.txt
+- Don't specify --version-script on Apple platforms in CMakeLists.txt
+- Fix casting error in contrib/testzlib/testzlib.c
+- Fix types in contrib/minizip to match result of get_crc_table()
+- Simplify contrib/vstudio/vc10 with 'd' suffix
+- Add TOP support to win32/Makefile.msc
+- Suport i686 and amd64 assembler builds in CMakeLists.txt
+- Fix typos in the use of _LARGEFILE64_SOURCE in zconf.h
+- Add vc11 and vc12 build files to contrib/vstudio
+- Add gzvprintf() as an undocumented function in zlib
+- Fix configure for Sun shell
+- Remove runtime check in configure for four-byte integer type
+- Add casts and consts to ease user conversion to C++
+- Add man pages for minizip and miniunzip
+- In Makefile uninstall, don't rm if preceding cd fails
+- Do not return Z_BUF_ERROR if deflateParam() has nothing to write
+
+Changes in 1.2.7 (2 May 2012)
+- Replace use of memmove() with a simple copy for portability
+- Test for existence of strerror
+- Restore gzgetc_ for backward compatibility with 1.2.6
+- Fix build with non-GNU make on Solaris
+- Require gcc 4.0 or later on Mac OS X to use the hidden attribute
+- Include unistd.h for Watcom C
+- Use __WATCOMC__ instead of __WATCOM__
+- Do not use the visibility attribute if NO_VIZ defined
+- Improve the detection of no hidden visibility attribute
+- Avoid using __int64 for gcc or solo compilation
+- Cast to char * in gzprintf to avoid warnings [Zinser]
+- Fix make_vms.com for VAX [Zinser]
+- Don't use library or built-in byte swaps
+- Simplify test and use of gcc hidden attribute
+- Fix bug in gzclose_w() when gzwrite() fails to allocate memory
+- Add "x" (O_EXCL) and "e" (O_CLOEXEC) modes support to gzopen()
+- Fix bug in test/minigzip.c for configure --solo
+- Fix contrib/vstudio project link errors [Mohanathas]
+- Add ability to choose the builder in make_vms.com [Schweda]
+- Add DESTDIR support to mingw32 win32/Makefile.gcc
+- Fix comments in win32/Makefile.gcc for proper usage
+- Allow overriding the default install locations for cmake
+- Generate and install the pkg-config file with cmake
+- Build both a static and a shared version of zlib with cmake
+- Include version symbols for cmake builds
+- If using cmake with MSVC, add the source directory to the includes
+- Remove unneeded EXTRA_CFLAGS from win32/Makefile.gcc [Truta]
+- Move obsolete emx makefile to old [Truta]
+- Allow the use of -Wundef when compiling or using zlib
+- Avoid the use of the -u option with mktemp
+- Improve inflate() documentation on the use of Z_FINISH
+- Recognize clang as gcc
+- Add gzopen_w() in Windows for wide character path names
+- Rename zconf.h in CMakeLists.txt to move it out of the way
+- Add source directory in CMakeLists.txt for building examples
+- Look in build directory for zlib.pc in CMakeLists.txt
+- Remove gzflags from zlibvc.def in vc9 and vc10
+- Fix contrib/minizip compilation in the MinGW environment
+- Update ./configure for Solaris, support --64 [Mooney]
+- Remove -R. from Solaris shared build (possible security issue)
+- Avoid race condition for parallel make (-j) running example
+- Fix type mismatch between get_crc_table() and crc_table
+- Fix parsing of version with "-" in CMakeLists.txt [Snider, Ziegler]
+- Fix the path to zlib.map in CMakeLists.txt
+- Force the native libtool in Mac OS X to avoid GNU libtool [Beebe]
+- Add instructions to win32/Makefile.gcc for shared install [Torri]
+
+Changes in 1.2.6.1 (12 Feb 2012)
+- Avoid the use of the Objective-C reserved name "id"
+- Include io.h in gzguts.h for Microsoft compilers
+- Fix problem with ./configure --prefix and gzgetc macro
+- Include gz_header definition when compiling zlib solo
+- Put gzflags() functionality back in zutil.c
+- Avoid library header include in crc32.c for Z_SOLO
+- Use name in GCC_CLASSIC as C compiler for coverage testing, if set
+- Minor cleanup in contrib/minizip/zip.c [Vollant]
+- Update make_vms.com [Zinser]
+- Remove unnecessary gzgetc_ function
+- Use optimized byte swap operations for Microsoft and GNU [Snyder]
+- Fix minor typo in zlib.h comments [Rzesniowiecki]
+
+Changes in 1.2.6 (29 Jan 2012)
+- Update the Pascal interface in contrib/pascal
+- Fix function numbers for gzgetc_ in zlibvc.def files
+- Fix configure.ac for contrib/minizip [Schiffer]
+- Fix large-entry detection in minizip on 64-bit systems [Schiffer]
+- Have ./configure use the compiler return code for error indication
+- Fix CMakeLists.txt for cross compilation [McClure]
+- Fix contrib/minizip/zip.c for 64-bit architectures [Dalsnes]
+- Fix compilation of contrib/minizip on FreeBSD [Marquez]
+- Correct suggested usages in win32/Makefile.msc [Shachar, Horvath]
+- Include io.h for Turbo C / Borland C on all platforms [Truta]
+- Make version explicit in contrib/minizip/configure.ac [Bosmans]
+- Avoid warning for no encryption in contrib/minizip/zip.c [Vollant]
+- Minor cleanup up contrib/minizip/unzip.c [Vollant]
+- Fix bug when compiling minizip with C++ [Vollant]
+- Protect for long name and extra fields in contrib/minizip [Vollant]
+- Avoid some warnings in contrib/minizip [Vollant]
+- Add -I../.. -L../.. to CFLAGS for minizip and miniunzip
+- Add missing libs to minizip linker command
+- Add support for VPATH builds in contrib/minizip
+- Add an --enable-demos option to contrib/minizip/configure
+- Add the generation of configure.log by ./configure
+- Exit when required parameters not provided to win32/Makefile.gcc
+- Have gzputc return the character written instead of the argument
+- Use the -m option on ldconfig for BSD systems [Tobias]
+- Correct in zlib.map when deflateResetKeep was added
+
+Changes in 1.2.5.3 (15 Jan 2012)
+- Restore gzgetc function for binary compatibility
+- Do not use _lseeki64 under Borland C++ [Truta]
+- Update win32/Makefile.msc to build test/*.c [Truta]
+- Remove old/visualc6 given CMakefile and other alternatives
+- Update AS400 build files and documentation [Monnerat]
+- Update win32/Makefile.gcc to build test/*.c [Truta]
+- Permit stronger flushes after Z_BLOCK flushes
+- Avoid extraneous empty blocks when doing empty flushes
+- Permit Z_NULL arguments to deflatePending
+- Allow deflatePrime() to insert bits in the middle of a stream
+- Remove second empty static block for Z_PARTIAL_FLUSH
+- Write out all of the available bits when using Z_BLOCK
+- Insert the first two strings in the hash table after a flush
+
+Changes in 1.2.5.2 (17 Dec 2011)
+- fix ld error: unable to find version dependency 'ZLIB_1.2.5'
+- use relative symlinks for shared libs
+- Avoid searching past window for Z_RLE strategy
+- Assure that high-water mark initialization is always applied in deflate
+- Add assertions to fill_window() in deflate.c to match comments
+- Update python link in README
+- Correct spelling error in gzread.c
+- Fix bug in gzgets() for a concatenated empty gzip stream
+- Correct error in comment for gz_make()
+- Change gzread() and related to ignore junk after gzip streams
+- Allow gzread() and related to continue after gzclearerr()
+- Allow gzrewind() and gzseek() after a premature end-of-file
+- Simplify gzseek() now that raw after gzip is ignored
+- Change gzgetc() to a macro for speed (~40% speedup in testing)
+- Fix gzclose() to return the actual error last encountered
+- Always add large file support for windows
+- Include zconf.h for windows large file support
+- Include zconf.h.cmakein for windows large file support
+- Update zconf.h.cmakein on make distclean
+- Merge vestigial vsnprintf determination from zutil.h to gzguts.h
+- Clarify how gzopen() appends in zlib.h comments
+- Correct documentation of gzdirect() since junk at end now ignored
+- Add a transparent write mode to gzopen() when 'T' is in the mode
+- Update python link in zlib man page
+- Get inffixed.h and MAKEFIXED result to match
+- Add a ./config --solo option to make zlib subset with no library use
+- Add undocumented inflateResetKeep() function for CAB file decoding
+- Add --cover option to ./configure for gcc coverage testing
+- Add #define ZLIB_CONST option to use const in the z_stream interface
+- Add comment to gzdopen() in zlib.h to use dup() when using fileno()
+- Note behavior of uncompress() to provide as much data as it can
+- Add files in contrib/minizip to aid in building libminizip
+- Split off AR options in Makefile.in and configure
+- Change ON macro to Z_ARG to avoid application conflicts
+- Facilitate compilation with Borland C++ for pragmas and vsnprintf
+- Include io.h for Turbo C / Borland C++
+- Move example.c and minigzip.c to test/
+- Simplify incomplete code table filling in inflate_table()
+- Remove code from inflate.c and infback.c that is impossible to execute
+- Test the inflate code with full coverage
+- Allow deflateSetDictionary, inflateSetDictionary at any time (in raw)
+- Add deflateResetKeep and fix inflateResetKeep to retain dictionary
+- Fix gzwrite.c to accommodate reduced memory zlib compilation
+- Have inflate() with Z_FINISH avoid the allocation of a window
+- Do not set strm->adler when doing raw inflate
+- Fix gzeof() to behave just like feof() when read is not past end of file
+- Fix bug in gzread.c when end-of-file is reached
+- Avoid use of Z_BUF_ERROR in gz* functions except for premature EOF
+- Document gzread() capability to read concurrently written files
+- Remove hard-coding of resource compiler in CMakeLists.txt [Blammo]
+
+Changes in 1.2.5.1 (10 Sep 2011)
+- Update FAQ entry on shared builds (#13)
+- Avoid symbolic argument to chmod in Makefile.in
+- Fix bug and add consts in contrib/puff [Oberhumer]
+- Update contrib/puff/zeros.raw test file to have all block types
+- Add full coverage test for puff in contrib/puff/Makefile
+- Fix static-only-build install in Makefile.in
+- Fix bug in unzGetCurrentFileInfo() in contrib/minizip [Kuno]
+- Add libz.a dependency to shared in Makefile.in for parallel builds
+- Spell out "number" (instead of "nb") in zlib.h for total_in, total_out
+- Replace $(...) with `...` in configure for non-bash sh [Bowler]
+- Add darwin* to Darwin* and solaris* to SunOS\ 5* in configure [Groffen]
+- Add solaris* to Linux* in configure to allow gcc use [Groffen]
+- Add *bsd* to Linux* case in configure [Bar-Lev]
+- Add inffast.obj to dependencies in win32/Makefile.msc
+- Correct spelling error in deflate.h [Kohler]
+- Change libzdll.a again to libz.dll.a (!) in win32/Makefile.gcc
+- Add test to configure for GNU C looking for gcc in output of $cc -v
+- Add zlib.pc generation to win32/Makefile.gcc [Weigelt]
+- Fix bug in zlib.h for _FILE_OFFSET_BITS set and _LARGEFILE64_SOURCE not
+- Add comment in zlib.h that adler32_combine with len2 < 0 makes no sense
+- Make NO_DIVIDE option in adler32.c much faster (thanks to John Reiser)
+- Make stronger test in zconf.h to include unistd.h for LFS
+- Apply Darwin patches for 64-bit file offsets to contrib/minizip [Slack]
+- Fix zlib.h LFS support when Z_PREFIX used
+- Add updated as400 support (removed from old) [Monnerat]
+- Avoid deflate sensitivity to volatile input data
+- Avoid division in adler32_combine for NO_DIVIDE
+- Clarify the use of Z_FINISH with deflateBound() amount of space
+- Set binary for output file in puff.c
+- Use u4 type for crc_table to avoid conversion warnings
+- Apply casts in zlib.h to avoid conversion warnings
+- Add OF to prototypes for adler32_combine_ and crc32_combine_ [Miller]
+- Improve inflateSync() documentation to note indeterminancy
+- Add deflatePending() function to return the amount of pending output
+- Correct the spelling of "specification" in FAQ [Randers-Pehrson]
+- Add a check in configure for stdarg.h, use for gzprintf()
+- Check that pointers fit in ints when gzprint() compiled old style
+- Add dummy name before $(SHAREDLIBV) in Makefile [Bar-Lev, Bowler]
+- Delete line in configure that adds -L. libz.a to LDFLAGS [Weigelt]
+- Add debug records in assmebler code [Londer]
+- Update RFC references to use http://tools.ietf.org/html/... [Li]
+- Add --archs option, use of libtool to configure for Mac OS X [Borstel]
+
+Changes in 1.2.5 (19 Apr 2010)
+- Disable visibility attribute in win32/Makefile.gcc [Bar-Lev]
+- Default to libdir as sharedlibdir in configure [Nieder]
+- Update copyright dates on modified source files
+- Update trees.c to be able to generate modified trees.h
+- Exit configure for MinGW, suggesting win32/Makefile.gcc
+- Check for NULL path in gz_open [Homurlu]
+
+Changes in 1.2.4.5 (18 Apr 2010)
+- Set sharedlibdir in configure [Torok]
+- Set LDFLAGS in Makefile.in [Bar-Lev]
+- Avoid mkdir objs race condition in Makefile.in [Bowler]
+- Add ZLIB_INTERNAL in front of internal inter-module functions and arrays
+- Define ZLIB_INTERNAL to hide internal functions and arrays for GNU C
+- Don't use hidden attribute when it is a warning generator (e.g. Solaris)
+
+Changes in 1.2.4.4 (18 Apr 2010)
+- Fix CROSS_PREFIX executable testing, CHOST extract, mingw* [Torok]
+- Undefine _LARGEFILE64_SOURCE in zconf.h if it is zero, but not if empty
+- Try to use bash or ksh regardless of functionality of /bin/sh
+- Fix configure incompatibility with NetBSD sh
+- Remove attempt to run under bash or ksh since have better NetBSD fix
+- Fix win32/Makefile.gcc for MinGW [Bar-Lev]
+- Add diagnostic messages when using CROSS_PREFIX in configure
+- Added --sharedlibdir option to configure [Weigelt]
+- Use hidden visibility attribute when available [Frysinger]
+
+Changes in 1.2.4.3 (10 Apr 2010)
+- Only use CROSS_PREFIX in configure for ar and ranlib if they exist
+- Use CROSS_PREFIX for nm [Bar-Lev]
+- Assume _LARGEFILE64_SOURCE defined is equivalent to true
+- Avoid use of undefined symbols in #if with && and ||
+- Make *64 prototypes in gzguts.h consistent with functions
+- Add -shared load option for MinGW in configure [Bowler]
+- Move z_off64_t to public interface, use instead of off64_t
+- Remove ! from shell test in configure (not portable to Solaris)
+- Change +0 macro tests to -0 for possibly increased portability
+
+Changes in 1.2.4.2 (9 Apr 2010)
+- Add consistent carriage returns to readme.txt's in masmx86 and masmx64
+- Really provide prototypes for *64 functions when building without LFS
+- Only define unlink() in minigzip.c if unistd.h not included
+- Update README to point to contrib/vstudio project files
+- Move projects/vc6 to old/ and remove projects/
+- Include stdlib.h in minigzip.c for setmode() definition under WinCE
+- Clean up assembler builds in win32/Makefile.msc [Rowe]
+- Include sys/types.h for Microsoft for off_t definition
+- Fix memory leak on error in gz_open()
+- Symbolize nm as $NM in configure [Weigelt]
+- Use TEST_LDSHARED instead of LDSHARED to link test programs [Weigelt]
+- Add +0 to _FILE_OFFSET_BITS and _LFS64_LARGEFILE in case not defined
+- Fix bug in gzeof() to take into account unused input data
+- Avoid initialization of structures with variables in puff.c
+- Updated win32/README-WIN32.txt [Rowe]
+
+Changes in 1.2.4.1 (28 Mar 2010)
+- Remove the use of [a-z] constructs for sed in configure [gentoo 310225]
+- Remove $(SHAREDLIB) from LIBS in Makefile.in [Creech]
+- Restore "for debugging" comment on sprintf() in gzlib.c
+- Remove fdopen for MVS from gzguts.h
+- Put new README-WIN32.txt in win32 [Rowe]
+- Add check for shell to configure and invoke another shell if needed
+- Fix big fat stinking bug in gzseek() on uncompressed files
+- Remove vestigial F_OPEN64 define in zutil.h
+- Set and check the value of _LARGEFILE_SOURCE and _LARGEFILE64_SOURCE
+- Avoid errors on non-LFS systems when applications define LFS macros
+- Set EXE to ".exe" in configure for MINGW [Kahle]
+- Match crc32() in crc32.c exactly to the prototype in zlib.h [Sherrill]
+- Add prefix for cross-compilation in win32/makefile.gcc [Bar-Lev]
+- Add DLL install in win32/makefile.gcc [Bar-Lev]
+- Allow Linux* or linux* from uname in configure [Bar-Lev]
+- Allow ldconfig to be redefined in configure and Makefile.in [Bar-Lev]
+- Add cross-compilation prefixes to configure [Bar-Lev]
+- Match type exactly in gz_load() invocation in gzread.c
+- Match type exactly of zcalloc() in zutil.c to zlib.h alloc_func
+- Provide prototypes for *64 functions when building zlib without LFS
+- Don't use -lc when linking shared library on MinGW
+- Remove errno.h check in configure and vestigial errno code in zutil.h
+
+Changes in 1.2.4 (14 Mar 2010)
+- Fix VER3 extraction in configure for no fourth subversion
+- Update zlib.3, add docs to Makefile.in to make .pdf out of it
+- Add zlib.3.pdf to distribution
+- Don't set error code in gzerror() if passed pointer is NULL
+- Apply destination directory fixes to CMakeLists.txt [Lowman]
+- Move #cmakedefine's to a new zconf.in.cmakein
+- Restore zconf.h for builds that don't use configure or cmake
+- Add distclean to dummy Makefile for convenience
+- Update and improve INDEX, README, and FAQ
+- Update CMakeLists.txt for the return of zconf.h [Lowman]
+- Update contrib/vstudio/vc9 and vc10 [Vollant]
+- Change libz.dll.a back to libzdll.a in win32/Makefile.gcc
+- Apply license and readme changes to contrib/asm686 [Raiter]
+- Check file name lengths and add -c option in minigzip.c [Li]
+- Update contrib/amd64 and contrib/masmx86/ [Vollant]
+- Avoid use of "eof" parameter in trees.c to not shadow library variable
+- Update make_vms.com for removal of zlibdefs.h [Zinser]
+- Update assembler code and vstudio projects in contrib [Vollant]
+- Remove outdated assembler code contrib/masm686 and contrib/asm586
+- Remove old vc7 and vc8 from contrib/vstudio
+- Update win32/Makefile.msc, add ZLIB_VER_SUBREVISION [Rowe]
+- Fix memory leaks in gzclose_r() and gzclose_w(), file leak in gz_open()
+- Add contrib/gcc_gvmat64 for longest_match and inflate_fast [Vollant]
+- Remove *64 functions from win32/zlib.def (they're not 64-bit yet)
+- Fix bug in void-returning vsprintf() case in gzwrite.c
+- Fix name change from inflate.h in contrib/inflate86/inffas86.c
+- Check if temporary file exists before removing in make_vms.com [Zinser]
+- Fix make install and uninstall for --static option
+- Fix usage of _MSC_VER in gzguts.h and zutil.h [Truta]
+- Update readme.txt in contrib/masmx64 and masmx86 to assemble
+
+Changes in 1.2.3.9 (21 Feb 2010)
+- Expunge gzio.c
+- Move as400 build information to old
+- Fix updates in contrib/minizip and contrib/vstudio
+- Add const to vsnprintf test in configure to avoid warnings [Weigelt]
+- Delete zconf.h (made by configure) [Weigelt]
+- Change zconf.in.h to zconf.h.in per convention [Weigelt]
+- Check for NULL buf in gzgets()
+- Return empty string for gzgets() with len == 1 (like fgets())
+- Fix description of gzgets() in zlib.h for end-of-file, NULL return
+- Update minizip to 1.1 [Vollant]
+- Avoid MSVC loss of data warnings in gzread.c, gzwrite.c
+- Note in zlib.h that gzerror() should be used to distinguish from EOF
+- Remove use of snprintf() from gzlib.c
+- Fix bug in gzseek()
+- Update contrib/vstudio, adding vc9 and vc10 [Kuno, Vollant]
+- Fix zconf.h generation in CMakeLists.txt [Lowman]
+- Improve comments in zconf.h where modified by configure
+
+Changes in 1.2.3.8 (13 Feb 2010)
+- Clean up text files (tabs, trailing whitespace, etc.) [Oberhumer]
+- Use z_off64_t in gz_zero() and gz_skip() to match state->skip
+- Avoid comparison problem when sizeof(int) == sizeof(z_off64_t)
+- Revert to Makefile.in from 1.2.3.6 (live with the clutter)
+- Fix missing error return in gzflush(), add zlib.h note
+- Add *64 functions to zlib.map [Levin]
+- Fix signed/unsigned comparison in gz_comp()
+- Use SFLAGS when testing shared linking in configure
+- Add --64 option to ./configure to use -m64 with gcc
+- Fix ./configure --help to correctly name options
+- Have make fail if a test fails [Levin]
+- Avoid buffer overrun in contrib/masmx64/gvmat64.asm [Simpson]
+- Remove assembler object files from contrib
+
+Changes in 1.2.3.7 (24 Jan 2010)
+- Always gzopen() with O_LARGEFILE if available
+- Fix gzdirect() to work immediately after gzopen() or gzdopen()
+- Make gzdirect() more precise when the state changes while reading
+- Improve zlib.h documentation in many places
+- Catch memory allocation failure in gz_open()
+- Complete close operation if seek forward in gzclose_w() fails
+- Return Z_ERRNO from gzclose_r() if close() fails
+- Return Z_STREAM_ERROR instead of EOF for gzclose() being passed NULL
+- Return zero for gzwrite() errors to match zlib.h description
+- Return -1 on gzputs() error to match zlib.h description
+- Add zconf.in.h to allow recovery from configure modification [Weigelt]
+- Fix static library permissions in Makefile.in [Weigelt]
+- Avoid warnings in configure tests that hide functionality [Weigelt]
+- Add *BSD and DragonFly to Linux case in configure [gentoo 123571]
+- Change libzdll.a to libz.dll.a in win32/Makefile.gcc [gentoo 288212]
+- Avoid access of uninitialized data for first inflateReset2 call [Gomes]
+- Keep object files in subdirectories to reduce the clutter somewhat
+- Remove default Makefile and zlibdefs.h, add dummy Makefile
+- Add new external functions to Z_PREFIX, remove duplicates, z_z_ -> z_
+- Remove zlibdefs.h completely -- modify zconf.h instead
+
+Changes in 1.2.3.6 (17 Jan 2010)
+- Avoid void * arithmetic in gzread.c and gzwrite.c
+- Make compilers happier with const char * for gz_error message
+- Avoid unused parameter warning in inflate.c
+- Avoid signed-unsigned comparison warning in inflate.c
+- Indent #pragma's for traditional C
+- Fix usage of strwinerror() in glib.c, change to gz_strwinerror()
+- Correct email address in configure for system options
+- Update make_vms.com and add make_vms.com to contrib/minizip [Zinser]
+- Update zlib.map [Brown]
+- Fix Makefile.in for Solaris 10 make of example64 and minizip64 [Torok]
+- Apply various fixes to CMakeLists.txt [Lowman]
+- Add checks on len in gzread() and gzwrite()
+- Add error message for no more room for gzungetc()
+- Remove zlib version check in gzwrite()
+- Defer compression of gzprintf() result until need to
+- Use snprintf() in gzdopen() if available
+- Remove USE_MMAP configuration determination (only used by minigzip)
+- Remove examples/pigz.c (available separately)
+- Update examples/gun.c to 1.6
+
+Changes in 1.2.3.5 (8 Jan 2010)
+- Add space after #if in zutil.h for some compilers
+- Fix relatively harmless bug in deflate_fast() [Exarevsky]
+- Fix same problem in deflate_slow()
+- Add $(SHAREDLIBV) to LIBS in Makefile.in [Brown]
+- Add deflate_rle() for faster Z_RLE strategy run-length encoding
+- Add deflate_huff() for faster Z_HUFFMAN_ONLY encoding
+- Change name of "write" variable in inffast.c to avoid library collisions
+- Fix premature EOF from gzread() in gzio.c [Brown]
+- Use zlib header window size if windowBits is 0 in inflateInit2()
+- Remove compressBound() call in deflate.c to avoid linking compress.o
+- Replace use of errno in gz* with functions, support WinCE [Alves]
+- Provide alternative to perror() in minigzip.c for WinCE [Alves]
+- Don't use _vsnprintf on later versions of MSVC [Lowman]
+- Add CMake build script and input file [Lowman]
+- Update contrib/minizip to 1.1 [Svensson, Vollant]
+- Moved nintendods directory from contrib to .
+- Replace gzio.c with a new set of routines with the same functionality
+- Add gzbuffer(), gzoffset(), gzclose_r(), gzclose_w() as part of above
+- Update contrib/minizip to 1.1b
+- Change gzeof() to return 0 on error instead of -1 to agree with zlib.h
+
+Changes in 1.2.3.4 (21 Dec 2009)
+- Use old school .SUFFIXES in Makefile.in for FreeBSD compatibility
+- Update comments in configure and Makefile.in for default --shared
+- Fix test -z's in configure [Marquess]
+- Build examplesh and minigzipsh when not testing
+- Change NULL's to Z_NULL's in deflate.c and in comments in zlib.h
+- Import LDFLAGS from the environment in configure
+- Fix configure to populate SFLAGS with discovered CFLAGS options
+- Adapt make_vms.com to the new Makefile.in [Zinser]
+- Add zlib2ansi script for C++ compilation [Marquess]
+- Add _FILE_OFFSET_BITS=64 test to make test (when applicable)
+- Add AMD64 assembler code for longest match to contrib [Teterin]
+- Include options from $SFLAGS when doing $LDSHARED
+- Simplify 64-bit file support by introducing z_off64_t type
+- Make shared object files in objs directory to work around old Sun cc
+- Use only three-part version number for Darwin shared compiles
+- Add rc option to ar in Makefile.in for when ./configure not run
+- Add -WI,-rpath,. to LDFLAGS for OSF 1 V4*
+- Set LD_LIBRARYN32_PATH for SGI IRIX shared compile
+- Protect against _FILE_OFFSET_BITS being defined when compiling zlib
+- Rename Makefile.in targets allstatic to static and allshared to shared
+- Fix static and shared Makefile.in targets to be independent
+- Correct error return bug in gz_open() by setting state [Brown]
+- Put spaces before ;;'s in configure for better sh compatibility
+- Add pigz.c (parallel implementation of gzip) to examples/
+- Correct constant in crc32.c to UL [Leventhal]
+- Reject negative lengths in crc32_combine()
+- Add inflateReset2() function to work like inflateEnd()/inflateInit2()
+- Include sys/types.h for _LARGEFILE64_SOURCE [Brown]
+- Correct typo in doc/algorithm.txt [Janik]
+- Fix bug in adler32_combine() [Zhu]
+- Catch missing-end-of-block-code error in all inflates and in puff
+    Assures that random input to inflate eventually results in an error
+- Added enough.c (calculation of ENOUGH for inftrees.h) to examples/
+- Update ENOUGH and its usage to reflect discovered bounds
+- Fix gzerror() error report on empty input file [Brown]
+- Add ush casts in trees.c to avoid pedantic runtime errors
+- Fix typo in zlib.h uncompress() description [Reiss]
+- Correct inflate() comments with regard to automatic header detection
+- Remove deprecation comment on Z_PARTIAL_FLUSH (it stays)
+- Put new version of gzlog (2.0) in examples with interruption recovery
+- Add puff compile option to permit invalid distance-too-far streams
+- Add puff TEST command options, ability to read piped input
+- Prototype the *64 functions in zlib.h when _FILE_OFFSET_BITS == 64, but
+  _LARGEFILE64_SOURCE not defined
+- Fix Z_FULL_FLUSH to truly erase the past by resetting s->strstart
+- Fix deflateSetDictionary() to use all 32K for output consistency
+- Remove extraneous #define MIN_LOOKAHEAD in deflate.c (in deflate.h)
+- Clear bytes after deflate lookahead to avoid use of uninitialized data
+- Change a limit in inftrees.c to be more transparent to Coverity Prevent
+- Update win32/zlib.def with exported symbols from zlib.h
+- Correct spelling errors in zlib.h [Willem, Sobrado]
+- Allow Z_BLOCK for deflate() to force a new block
+- Allow negative bits in inflatePrime() to delete existing bit buffer
+- Add Z_TREES flush option to inflate() to return at end of trees
+- Add inflateMark() to return current state information for random access
+- Add Makefile for NintendoDS to contrib [Costa]
+- Add -w in configure compile tests to avoid spurious warnings [Beucler]
+- Fix typos in zlib.h comments for deflateSetDictionary()
+- Fix EOF detection in transparent gzread() [Maier]
+
+Changes in 1.2.3.3 (2 October 2006)
+- Make --shared the default for configure, add a --static option
+- Add compile option to permit invalid distance-too-far streams
+- Add inflateUndermine() function which is required to enable above
+- Remove use of "this" variable name for C++ compatibility [Marquess]
+- Add testing of shared library in make test, if shared library built
+- Use ftello() and fseeko() if available instead of ftell() and fseek()
+- Provide two versions of all functions that use the z_off_t type for
+  binary compatibility -- a normal version and a 64-bit offset version,
+  per the Large File Support Extension when _LARGEFILE64_SOURCE is
+  defined; use the 64-bit versions by default when _FILE_OFFSET_BITS
+  is defined to be 64
+- Add a --uname= option to configure to perhaps help with cross-compiling
+
+Changes in 1.2.3.2 (3 September 2006)
+- Turn off silly Borland warnings [Hay]
+- Use off64_t and define _LARGEFILE64_SOURCE when present
+- Fix missing dependency on inffixed.h in Makefile.in
+- Rig configure --shared to build both shared and static [Teredesai, Truta]
+- Remove zconf.in.h and instead create a new zlibdefs.h file
+- Fix contrib/minizip/unzip.c non-encrypted after encrypted [Vollant]
+- Add treebuild.xml (see http://treebuild.metux.de/) [Weigelt]
+
+Changes in 1.2.3.1 (16 August 2006)
+- Add watcom directory with OpenWatcom make files [Daniel]
+- Remove #undef of FAR in zconf.in.h for MVS [Fedtke]
+- Update make_vms.com [Zinser]
+- Use -fPIC for shared build in configure [Teredesai, Nicholson]
+- Use only major version number for libz.so on IRIX and OSF1 [Reinholdtsen]
+- Use fdopen() (not _fdopen()) for Interix in zutil.h [Bäck]
+- Add some FAQ entries about the contrib directory
+- Update the MVS question in the FAQ
+- Avoid extraneous reads after EOF in gzio.c [Brown]
+- Correct spelling of "successfully" in gzio.c [Randers-Pehrson]
+- Add comments to zlib.h about gzerror() usage [Brown]
+- Set extra flags in gzip header in gzopen() like deflate() does
+- Make configure options more compatible with double-dash conventions
+  [Weigelt]
+- Clean up compilation under Solaris SunStudio cc [Rowe, Reinholdtsen]
+- Fix uninstall target in Makefile.in [Truta]
+- Add pkgconfig support [Weigelt]
+- Use $(DESTDIR) macro in Makefile.in [Reinholdtsen, Weigelt]
+- Replace set_data_type() with a more accurate detect_data_type() in
+  trees.c, according to the txtvsbin.txt document [Truta]
+- Swap the order of #include <stdio.h> and #include "zlib.h" in
+  gzio.c, example.c and minigzip.c [Truta]
+- Shut up annoying VS2005 warnings about standard C deprecation [Rowe,
+  Truta] (where?)
+- Fix target "clean" from win32/Makefile.bor [Truta]
+- Create .pdb and .manifest files in win32/makefile.msc [Ziegler, Rowe]
+- Update zlib www home address in win32/DLL_FAQ.txt [Truta]
+- Update contrib/masmx86/inffas32.asm for VS2005 [Vollant, Van Wassenhove]
+- Enable browse info in the "Debug" and "ASM Debug" configurations in
+  the Visual C++ 6 project, and set (non-ASM) "Debug" as default [Truta]
+- Add pkgconfig support [Weigelt]
+- Add ZLIB_VER_MAJOR, ZLIB_VER_MINOR and ZLIB_VER_REVISION in zlib.h,
+  for use in win32/zlib1.rc [Polushin, Rowe, Truta]
+- Add a document that explains the new text detection scheme to
+  doc/txtvsbin.txt [Truta]
+- Add rfc1950.txt, rfc1951.txt and rfc1952.txt to doc/ [Truta]
+- Move algorithm.txt into doc/ [Truta]
+- Synchronize FAQ with website
+- Fix compressBound(), was low for some pathological cases [Fearnley]
+- Take into account wrapper variations in deflateBound()
+- Set examples/zpipe.c input and output to binary mode for Windows
+- Update examples/zlib_how.html with new zpipe.c (also web site)
+- Fix some warnings in examples/gzlog.c and examples/zran.c (it seems
+  that gcc became pickier in 4.0)
+- Add zlib.map for Linux: "All symbols from zlib-1.1.4 remain
+  un-versioned, the patch adds versioning only for symbols introduced in
+  zlib-1.2.0 or later.  It also declares as local those symbols which are
+  not designed to be exported." [Levin]
+- Update Z_PREFIX list in zconf.in.h, add --zprefix option to configure
+- Do not initialize global static by default in trees.c, add a response
+  NO_INIT_GLOBAL_POINTERS to initialize them if needed [Marquess]
+- Don't use strerror() in gzio.c under WinCE [Yakimov]
+- Don't use errno.h in zutil.h under WinCE [Yakimov]
+- Move arguments for AR to its usage to allow replacing ar [Marot]
+- Add HAVE_VISIBILITY_PRAGMA in zconf.in.h for Mozilla [Randers-Pehrson]
+- Improve inflateInit() and inflateInit2() documentation
+- Fix structure size comment in inflate.h
+- Change configure help option from --h* to --help [Santos]
+
+Changes in 1.2.3 (18 July 2005)
+- Apply security vulnerability fixes to contrib/infback9 as well
+- Clean up some text files (carriage returns, trailing space)
+- Update testzlib, vstudio, masmx64, and masmx86 in contrib [Vollant]
+
+Changes in 1.2.2.4 (11 July 2005)
+- Add inflatePrime() function for starting inflation at bit boundary
+- Avoid some Visual C warnings in deflate.c
+- Avoid more silly Visual C warnings in inflate.c and inftrees.c for 64-bit
+  compile
+- Fix some spelling errors in comments [Betts]
+- Correct inflateInit2() error return documentation in zlib.h
+- Add zran.c example of compressed data random access to examples
+  directory, shows use of inflatePrime()
+- Fix cast for assignments to strm->state in inflate.c and infback.c
+- Fix zlibCompileFlags() in zutil.c to use 1L for long shifts [Oberhumer]
+- Move declarations of gf2 functions to right place in crc32.c [Oberhumer]
+- Add cast in trees.c t avoid a warning [Oberhumer]
+- Avoid some warnings in fitblk.c, gun.c, gzjoin.c in examples [Oberhumer]
+- Update make_vms.com [Zinser]
+- Initialize state->write in inflateReset() since copied in inflate_fast()
+- Be more strict on incomplete code sets in inflate_table() and increase
+  ENOUGH and MAXD -- this repairs a possible security vulnerability for
+  invalid inflate input.  Thanks to Tavis Ormandy and Markus Oberhumer for
+  discovering the vulnerability and providing test cases.
+- Add ia64 support to configure for HP-UX [Smith]
+- Add error return to gzread() for format or i/o error [Levin]
+- Use malloc.h for OS/2 [Necasek]
+
+Changes in 1.2.2.3 (27 May 2005)
+- Replace 1U constants in inflate.c and inftrees.c for 64-bit compile
+- Typecast fread() return values in gzio.c [Vollant]
+- Remove trailing space in minigzip.c outmode (VC++ can't deal with it)
+- Fix crc check bug in gzread() after gzungetc() [Heiner]
+- Add the deflateTune() function to adjust internal compression parameters
+- Add a fast gzip decompressor, gun.c, to examples (use of inflateBack)
+- Remove an incorrect assertion in examples/zpipe.c
+- Add C++ wrapper in infback9.h [Donais]
+- Fix bug in inflateCopy() when decoding fixed codes
+- Note in zlib.h how much deflateSetDictionary() actually uses
+- Remove USE_DICT_HEAD in deflate.c (would mess up inflate if used)
+- Add _WIN32_WCE to define WIN32 in zconf.in.h [Spencer]
+- Don't include stderr.h or errno.h for _WIN32_WCE in zutil.h [Spencer]
+- Add gzdirect() function to indicate transparent reads
+- Update contrib/minizip [Vollant]
+- Fix compilation of deflate.c when both ASMV and FASTEST [Oberhumer]
+- Add casts in crc32.c to avoid warnings [Oberhumer]
+- Add contrib/masmx64 [Vollant]
+- Update contrib/asm586, asm686, masmx86, testzlib, vstudio [Vollant]
+
+Changes in 1.2.2.2 (30 December 2004)
+- Replace structure assignments in deflate.c and inflate.c with zmemcpy to
+  avoid implicit memcpy calls (portability for no-library compilation)
+- Increase sprintf() buffer size in gzdopen() to allow for large numbers
+- Add INFLATE_STRICT to check distances against zlib header
+- Improve WinCE errno handling and comments [Chang]
+- Remove comment about no gzip header processing in FAQ
+- Add Z_FIXED strategy option to deflateInit2() to force fixed trees
+- Add updated make_vms.com [Coghlan], update README
+- Create a new "examples" directory, move gzappend.c there, add zpipe.c,
+  fitblk.c, gzlog.[ch], gzjoin.c, and zlib_how.html.
+- Add FAQ entry and comments in deflate.c on uninitialized memory access
+- Add Solaris 9 make options in configure [Gilbert]
+- Allow strerror() usage in gzio.c for STDC
+- Fix DecompressBuf in contrib/delphi/ZLib.pas [ManChesTer]
+- Update contrib/masmx86/inffas32.asm and gvmat32.asm [Vollant]
+- Use z_off_t for adler32_combine() and crc32_combine() lengths
+- Make adler32() much faster for small len
+- Use OS_CODE in deflate() default gzip header
+
+Changes in 1.2.2.1 (31 October 2004)
+- Allow inflateSetDictionary() call for raw inflate
+- Fix inflate header crc check bug for file names and comments
+- Add deflateSetHeader() and gz_header structure for custom gzip headers
+- Add inflateGetheader() to retrieve gzip headers
+- Add crc32_combine() and adler32_combine() functions
+- Add alloc_func, free_func, in_func, out_func to Z_PREFIX list
+- Use zstreamp consistently in zlib.h (inflate_back functions)
+- Remove GUNZIP condition from definition of inflate_mode in inflate.h
+  and in contrib/inflate86/inffast.S [Truta, Anderson]
+- Add support for AMD64 in contrib/inflate86/inffas86.c [Anderson]
+- Update projects/README.projects and projects/visualc6 [Truta]
+- Update win32/DLL_FAQ.txt [Truta]
+- Avoid warning under NO_GZCOMPRESS in gzio.c; fix typo [Truta]
+- Deprecate Z_ASCII; use Z_TEXT instead [Truta]
+- Use a new algorithm for setting strm->data_type in trees.c [Truta]
+- Do not define an exit() prototype in zutil.c unless DEBUG defined
+- Remove prototype of exit() from zutil.c, example.c, minigzip.c [Truta]
+- Add comment in zlib.h for Z_NO_FLUSH parameter to deflate()
+- Fix Darwin build version identification [Peterson]
+
+Changes in 1.2.2 (3 October 2004)
+- Update zlib.h comments on gzip in-memory processing
+- Set adler to 1 in inflateReset() to support Java test suite [Walles]
+- Add contrib/dotzlib [Ravn]
+- Update win32/DLL_FAQ.txt [Truta]
+- Update contrib/minizip [Vollant]
+- Move contrib/visual-basic.txt to old/ [Truta]
+- Fix assembler builds in projects/visualc6/ [Truta]
+
+Changes in 1.2.1.2 (9 September 2004)
+- Update INDEX file
+- Fix trees.c to update strm->data_type (no one ever noticed!)
+- Fix bug in error case in inflate.c, infback.c, and infback9.c [Brown]
+- Add "volatile" to crc table flag declaration (for DYNAMIC_CRC_TABLE)
+- Add limited multitasking protection to DYNAMIC_CRC_TABLE
+- Add NO_vsnprintf for VMS in zutil.h [Mozilla]
+- Don't declare strerror() under VMS [Mozilla]
+- Add comment to DYNAMIC_CRC_TABLE to use get_crc_table() to initialize
+- Update contrib/ada [Anisimkov]
+- Update contrib/minizip [Vollant]
+- Fix configure to not hardcode directories for Darwin [Peterson]
+- Fix gzio.c to not return error on empty files [Brown]
+- Fix indentation; update version in contrib/delphi/ZLib.pas and
+  contrib/pascal/zlibpas.pas [Truta]
+- Update mkasm.bat in contrib/masmx86 [Truta]
+- Update contrib/untgz [Truta]
+- Add projects/README.projects [Truta]
+- Add project for MS Visual C++ 6.0 in projects/visualc6 [Cadieux, Truta]
+- Update win32/DLL_FAQ.txt [Truta]
+- Update list of Z_PREFIX symbols in zconf.h [Randers-Pehrson, Truta]
+- Remove an unnecessary assignment to curr in inftrees.c [Truta]
+- Add OS/2 to exe builds in configure [Poltorak]
+- Remove err dummy parameter in zlib.h [Kientzle]
+
+Changes in 1.2.1.1 (9 January 2004)
+- Update email address in README
+- Several FAQ updates
+- Fix a big fat bug in inftrees.c that prevented decoding valid
+  dynamic blocks with only literals and no distance codes --
+  Thanks to "Hot Emu" for the bug report and sample file
+- Add a note to puff.c on no distance codes case.
+
+Changes in 1.2.1 (17 November 2003)
+- Remove a tab in contrib/gzappend/gzappend.c
+- Update some interfaces in contrib for new zlib functions
+- Update zlib version number in some contrib entries
+- Add Windows CE definition for ptrdiff_t in zutil.h [Mai, Truta]
+- Support shared libraries on Hurd and KFreeBSD [Brown]
+- Fix error in NO_DIVIDE option of adler32.c
+
+Changes in 1.2.0.8 (4 November 2003)
+- Update version in contrib/delphi/ZLib.pas and contrib/pascal/zlibpas.pas
+- Add experimental NO_DIVIDE #define in adler32.c
+    - Possibly faster on some processors (let me know if it is)
+- Correct Z_BLOCK to not return on first inflate call if no wrap
+- Fix strm->data_type on inflate() return to correctly indicate EOB
+- Add deflatePrime() function for appending in the middle of a byte
+- Add contrib/gzappend for an example of appending to a stream
+- Update win32/DLL_FAQ.txt [Truta]
+- Delete Turbo C comment in README [Truta]
+- Improve some indentation in zconf.h [Truta]
+- Fix infinite loop on bad input in configure script [Church]
+- Fix gzeof() for concatenated gzip files [Johnson]
+- Add example to contrib/visual-basic.txt [Michael B.]
+- Add -p to mkdir's in Makefile.in [vda]
+- Fix configure to properly detect presence or lack of printf functions
+- Add AS400 support [Monnerat]
+- Add a little Cygwin support [Wilson]
+
+Changes in 1.2.0.7 (21 September 2003)
+- Correct some debug formats in contrib/infback9
+- Cast a type in a debug statement in trees.c
+- Change search and replace delimiter in configure from % to # [Beebe]
+- Update contrib/untgz to 0.2 with various fixes [Truta]
+- Add build support for Amiga [Nikl]
+- Remove some directories in old that have been updated to 1.2
+- Add dylib building for Mac OS X in configure and Makefile.in
+- Remove old distribution stuff from Makefile
+- Update README to point to DLL_FAQ.txt, and add comment on Mac OS X
+- Update links in README
+
+Changes in 1.2.0.6 (13 September 2003)
+- Minor FAQ updates
+- Update contrib/minizip to 1.00 [Vollant]
+- Remove test of gz functions in example.c when GZ_COMPRESS defined [Truta]
+- Update POSTINC comment for 68060 [Nikl]
+- Add contrib/infback9 with deflate64 decoding (unsupported)
+- For MVS define NO_vsnprintf and undefine FAR [van Burik]
+- Add pragma for fdopen on MVS [van Burik]
+
+Changes in 1.2.0.5 (8 September 2003)
+- Add OF to inflateBackEnd() declaration in zlib.h
+- Remember start when using gzdopen in the middle of a file
+- Use internal off_t counters in gz* functions to properly handle seeks
+- Perform more rigorous check for distance-too-far in inffast.c
+- Add Z_BLOCK flush option to return from inflate at block boundary
+- Set strm->data_type on return from inflate
+    - Indicate bits unused, if at block boundary, and if in last block
+- Replace size_t with ptrdiff_t in crc32.c, and check for correct size
+- Add condition so old NO_DEFLATE define still works for compatibility
+- FAQ update regarding the Windows DLL [Truta]
+- INDEX update: add qnx entry, remove aix entry [Truta]
+- Install zlib.3 into mandir [Wilson]
+- Move contrib/zlib_dll_FAQ.txt to win32/DLL_FAQ.txt; update [Truta]
+- Adapt the zlib interface to the new DLL convention guidelines [Truta]
+- Introduce ZLIB_WINAPI macro to allow the export of functions using
+  the WINAPI calling convention, for Visual Basic [Vollant, Truta]
+- Update msdos and win32 scripts and makefiles [Truta]
+- Export symbols by name, not by ordinal, in win32/zlib.def [Truta]
+- Add contrib/ada [Anisimkov]
+- Move asm files from contrib/vstudio/vc70_32 to contrib/asm386 [Truta]
+- Rename contrib/asm386 to contrib/masmx86 [Truta, Vollant]
+- Add contrib/masm686 [Truta]
+- Fix offsets in contrib/inflate86 and contrib/masmx86/inffas32.asm
+  [Truta, Vollant]
+- Update contrib/delphi; rename to contrib/pascal; add example [Truta]
+- Remove contrib/delphi2; add a new contrib/delphi [Truta]
+- Avoid inclusion of the nonstandard <memory.h> in contrib/iostream,
+  and fix some method prototypes [Truta]
+- Fix the ZCR_SEED2 constant to avoid warnings in contrib/minizip
+  [Truta]
+- Avoid the use of backslash (\) in contrib/minizip [Vollant]
+- Fix file time handling in contrib/untgz; update makefiles [Truta]
+- Update contrib/vstudio/vc70_32 to comply with the new DLL guidelines
+  [Vollant]
+- Remove contrib/vstudio/vc15_16 [Vollant]
+- Rename contrib/vstudio/vc70_32 to contrib/vstudio/vc7 [Truta]
+- Update README.contrib [Truta]
+- Invert the assignment order of match_head and s->prev[...] in
+  INSERT_STRING [Truta]
+- Compare TOO_FAR with 32767 instead of 32768, to avoid 16-bit warnings
+  [Truta]
+- Compare function pointers with 0, not with NULL or Z_NULL [Truta]
+- Fix prototype of syncsearch in inflate.c [Truta]
+- Introduce ASMINF macro to be enabled when using an ASM implementation
+  of inflate_fast [Truta]
+- Change NO_DEFLATE to NO_GZCOMPRESS [Truta]
+- Modify test_gzio in example.c to take a single file name as a
+  parameter [Truta]
+- Exit the example.c program if gzopen fails [Truta]
+- Add type casts around strlen in example.c [Truta]
+- Remove casting to sizeof in minigzip.c; give a proper type
+  to the variable compared with SUFFIX_LEN [Truta]
+- Update definitions of STDC and STDC99 in zconf.h [Truta]
+- Synchronize zconf.h with the new Windows DLL interface [Truta]
+- Use SYS16BIT instead of __32BIT__ to distinguish between
+  16- and 32-bit platforms [Truta]
+- Use far memory allocators in small 16-bit memory models for
+  Turbo C [Truta]
+- Add info about the use of ASMV, ASMINF and ZLIB_WINAPI in
+  zlibCompileFlags [Truta]
+- Cygwin has vsnprintf [Wilson]
+- In Windows16, OS_CODE is 0, as in MSDOS [Truta]
+- In Cygwin, OS_CODE is 3 (Unix), not 11 (Windows32) [Wilson]
+
+Changes in 1.2.0.4 (10 August 2003)
+- Minor FAQ updates
+- Be more strict when checking inflateInit2's windowBits parameter
+- Change NO_GUNZIP compile option to NO_GZIP to cover deflate as well
+- Add gzip wrapper option to deflateInit2 using windowBits
+- Add updated QNX rule in configure and qnx directory [Bonnefoy]
+- Make inflate distance-too-far checks more rigorous
+- Clean up FAR usage in inflate
+- Add casting to sizeof() in gzio.c and minigzip.c
+
+Changes in 1.2.0.3 (19 July 2003)
+- Fix silly error in gzungetc() implementation [Vollant]
+- Update contrib/minizip and contrib/vstudio [Vollant]
+- Fix printf format in example.c
+- Correct cdecl support in zconf.in.h [Anisimkov]
+- Minor FAQ updates
+
+Changes in 1.2.0.2 (13 July 2003)
+- Add ZLIB_VERNUM in zlib.h for numerical preprocessor comparisons
+- Attempt to avoid warnings in crc32.c for pointer-int conversion
+- Add AIX to configure, remove aix directory [Bakker]
+- Add some casts to minigzip.c
+- Improve checking after insecure sprintf() or vsprintf() calls
+- Remove #elif's from crc32.c
+- Change leave label to inf_leave in inflate.c and infback.c to avoid
+  library conflicts
+- Remove inflate gzip decoding by default--only enable gzip decoding by
+  special request for stricter backward compatibility
+- Add zlibCompileFlags() function to return compilation information
+- More typecasting in deflate.c to avoid warnings
+- Remove leading underscore from _Capital #defines [Truta]
+- Fix configure to link shared library when testing
+- Add some Windows CE target adjustments [Mai]
+- Remove #define ZLIB_DLL in zconf.h [Vollant]
+- Add zlib.3 [Rodgers]
+- Update RFC URL in deflate.c and algorithm.txt [Mai]
+- Add zlib_dll_FAQ.txt to contrib [Truta]
+- Add UL to some constants [Truta]
+- Update minizip and vstudio [Vollant]
+- Remove vestigial NEED_DUMMY_RETURN from zconf.in.h
+- Expand use of NO_DUMMY_DECL to avoid all dummy structures
+- Added iostream3 to contrib [Schwardt]
+- Replace rewind() with fseek() for WinCE [Truta]
+- Improve setting of zlib format compression level flags
+    - Report 0 for huffman and rle strategies and for level == 0 or 1
+    - Report 2 only for level == 6
+- Only deal with 64K limit when necessary at compile time [Truta]
+- Allow TOO_FAR check to be turned off at compile time [Truta]
+- Add gzclearerr() function [Souza]
+- Add gzungetc() function
+
+Changes in 1.2.0.1 (17 March 2003)
+- Add Z_RLE strategy for run-length encoding [Truta]
+    - When Z_RLE requested, restrict matches to distance one
+    - Update zlib.h, minigzip.c, gzopen(), gzdopen() for Z_RLE
+- Correct FASTEST compilation to allow level == 0
+- Clean up what gets compiled for FASTEST
+- Incorporate changes to zconf.in.h [Vollant]
+    - Refine detection of Turbo C need for dummy returns
+    - Refine ZLIB_DLL compilation
+    - Include additional header file on VMS for off_t typedef
+- Try to use _vsnprintf where it supplants vsprintf [Vollant]
+- Add some casts in inffast.c
+- Enchance comments in zlib.h on what happens if gzprintf() tries to
+  write more than 4095 bytes before compression
+- Remove unused state from inflateBackEnd()
+- Remove exit(0) from minigzip.c, example.c
+- Get rid of all those darn tabs
+- Add "check" target to Makefile.in that does the same thing as "test"
+- Add "mostlyclean" and "maintainer-clean" targets to Makefile.in
+- Update contrib/inflate86 [Anderson]
+- Update contrib/testzlib, contrib/vstudio, contrib/minizip [Vollant]
+- Add msdos and win32 directories with makefiles [Truta]
+- More additions and improvements to the FAQ
+
+Changes in 1.2.0 (9 March 2003)
+- New and improved inflate code
+    - About 20% faster
+    - Does not allocate 32K window unless and until needed
+    - Automatically detects and decompresses gzip streams
+    - Raw inflate no longer needs an extra dummy byte at end
+    - Added inflateBack functions using a callback interface--even faster
+      than inflate, useful for file utilities (gzip, zip)
+    - Added inflateCopy() function to record state for random access on
+      externally generated deflate streams (e.g. in gzip files)
+    - More readable code (I hope)
+- New and improved crc32()
+    - About 50% faster, thanks to suggestions from Rodney Brown
+- Add deflateBound() and compressBound() functions
+- Fix memory leak in deflateInit2()
+- Permit setting dictionary for raw deflate (for parallel deflate)
+- Fix const declaration for gzwrite()
+- Check for some malloc() failures in gzio.c
+- Fix bug in gzopen() on single-byte file 0x1f
+- Fix bug in gzread() on concatenated file with 0x1f at end of buffer
+  and next buffer doesn't start with 0x8b
+- Fix uncompress() to return Z_DATA_ERROR on truncated input
+- Free memory at end of example.c
+- Remove MAX #define in trees.c (conflicted with some libraries)
+- Fix static const's in deflate.c, gzio.c, and zutil.[ch]
+- Declare malloc() and free() in gzio.c if STDC not defined
+- Use malloc() instead of calloc() in zutil.c if int big enough
+- Define STDC for AIX
+- Add aix/ with approach for compiling shared library on AIX
+- Add HP-UX support for shared libraries in configure
+- Add OpenUNIX support for shared libraries in configure
+- Use $cc instead of gcc to build shared library
+- Make prefix directory if needed when installing
+- Correct Macintosh avoidance of typedef Byte in zconf.h
+- Correct Turbo C memory allocation when under Linux
+- Use libz.a instead of -lz in Makefile (assure use of compiled library)
+- Update configure to check for snprintf or vsnprintf functions and their
+  return value, warn during make if using an insecure function
+- Fix configure problem with compile-time knowledge of HAVE_UNISTD_H that
+  is lost when library is used--resolution is to build new zconf.h
+- Documentation improvements (in zlib.h):
+    - Document raw deflate and inflate
+    - Update RFCs URL
+    - Point out that zlib and gzip formats are different
+    - Note that Z_BUF_ERROR is not fatal
+    - Document string limit for gzprintf() and possible buffer overflow
+    - Note requirement on avail_out when flushing
+    - Note permitted values of flush parameter of inflate()
+- Add some FAQs (and even answers) to the FAQ
+- Add contrib/inflate86/ for x86 faster inflate
+- Add contrib/blast/ for PKWare Data Compression Library decompression
+- Add contrib/puff/ simple inflate for deflate format description
+
+Changes in 1.1.4 (11 March 2002)
+- ZFREE was repeated on same allocation on some error conditions.
+  This creates a security problem described in
+  http://www.zlib.org/advisory-2002-03-11.txt
+- Returned incorrect error (Z_MEM_ERROR) on some invalid data
+- Avoid accesses before window for invalid distances with inflate window
+  less than 32K.
+- force windowBits > 8 to avoid a bug in the encoder for a window size
+  of 256 bytes. (A complete fix will be available in 1.1.5).
+
+Changes in 1.1.3 (9 July 1998)
+- fix "an inflate input buffer bug that shows up on rare but persistent
+  occasions" (Mark)
+- fix gzread and gztell for concatenated .gz files (Didier Le Botlan)
+- fix gzseek(..., SEEK_SET) in write mode
+- fix crc check after a gzeek (Frank Faubert)
+- fix miniunzip when the last entry in a zip file is itself a zip file
+  (J Lillge)
+- add contrib/asm586 and contrib/asm686 (Brian Raiter)
+  See http://www.muppetlabs.com/~breadbox/software/assembly.html
+- add support for Delphi 3 in contrib/delphi (Bob Dellaca)
+- add support for C++Builder 3 and Delphi 3 in contrib/delphi2 (Davide Moretti)
+- do not exit prematurely in untgz if 0 at start of block (Magnus Holmgren)
+- use macro EXTERN instead of extern to support DLL for BeOS (Sander Stoks)
+- added a FAQ file
+
+- Support gzdopen on Mac with Metrowerks (Jason Linhart)
+- Do not redefine Byte on Mac (Brad Pettit & Jason Linhart)
+- define SEEK_END too if SEEK_SET is not defined (Albert Chin-A-Young)
+- avoid some warnings with Borland C (Tom Tanner)
+- fix a problem in contrib/minizip/zip.c for 16-bit MSDOS (Gilles Vollant)
+- emulate utime() for WIN32 in contrib/untgz  (Gilles Vollant)
+- allow several arguments to configure (Tim Mooney, Frodo Looijaard)
+- use libdir and includedir in Makefile.in (Tim Mooney)
+- support shared libraries on OSF1 V4 (Tim Mooney)
+- remove so_locations in "make clean"  (Tim Mooney)
+- fix maketree.c compilation error (Glenn, Mark)
+- Python interface to zlib now in Python 1.5 (Jeremy Hylton)
+- new Makefile.riscos (Rich Walker)
+- initialize static descriptors in trees.c for embedded targets (Nick Smith)
+- use "foo-gz" in example.c for RISCOS and VMS (Nick Smith)
+- add the OS/2 files in Makefile.in too (Andrew Zabolotny)
+- fix fdopen and halloc macros for Microsoft C 6.0 (Tom Lane)
+- fix maketree.c to allow clean compilation of inffixed.h (Mark)
+- fix parameter check in deflateCopy (Gunther Nikl)
+- cleanup trees.c, use compressed_len only in debug mode (Christian Spieler)
+- Many portability patches by Christian Spieler:
+  . zutil.c, zutil.h: added "const" for zmem*
+  . Make_vms.com: fixed some typos
+  . Make_vms.com: msdos/Makefile.*: removed zutil.h from some dependency lists
+  . msdos/Makefile.msc: remove "default rtl link library" info from obj files
+  . msdos/Makefile.*: use model-dependent name for the built zlib library
+  . msdos/Makefile.emx, nt/Makefile.emx, nt/Makefile.gcc:
+     new makefiles, for emx (DOS/OS2), emx&rsxnt and mingw32 (Windows 9x / NT)
+- use define instead of typedef for Bytef also for MSC small/medium (Tom Lane)
+- replace __far with _far for better portability (Christian Spieler, Tom Lane)
+- fix test for errno.h in configure (Tim Newsham)
+
+Changes in 1.1.2 (19 March 98)
+- added contrib/minzip, mini zip and unzip based on zlib (Gilles Vollant)
+  See http://www.winimage.com/zLibDll/unzip.html
+- preinitialize the inflate tables for fixed codes, to make the code
+  completely thread safe (Mark)
+- some simplifications and slight speed-up to the inflate code (Mark)
+- fix gzeof on non-compressed files (Allan Schrum)
+- add -std1 option in configure for OSF1 to fix gzprintf (Martin Mokrejs)
+- use default value of 4K for Z_BUFSIZE for 16-bit MSDOS (Tim Wegner + Glenn)
+- added os2/Makefile.def and os2/zlib.def (Andrew Zabolotny)
+- add shared lib support for UNIX_SV4.2MP (MATSUURA Takanori)
+- do not wrap extern "C" around system includes (Tom Lane)
+- mention zlib binding for TCL in README (Andreas Kupries)
+- added amiga/Makefile.pup for Amiga powerUP SAS/C PPC (Andreas Kleinert)
+- allow "make install prefix=..." even after configure (Glenn Randers-Pehrson)
+- allow "configure --prefix $HOME" (Tim Mooney)
+- remove warnings in example.c and gzio.c (Glenn Randers-Pehrson)
+- move Makefile.sas to amiga/Makefile.sas
+
+Changes in 1.1.1 (27 Feb 98)
+- fix macros _tr_tally_* in deflate.h for debug mode  (Glenn Randers-Pehrson)
+- remove block truncation heuristic which had very marginal effect for zlib
+  (smaller lit_bufsize than in gzip 1.2.4) and degraded a little the
+  compression ratio on some files. This also allows inlining _tr_tally for
+  matches in deflate_slow.
+- added msdos/Makefile.w32 for WIN32 Microsoft Visual C++ (Bob Frazier)
+
+Changes in 1.1.0 (24 Feb 98)
+- do not return STREAM_END prematurely in inflate (John Bowler)
+- revert to the zlib 1.0.8 inflate to avoid the gcc 2.8.0 bug (Jeremy Buhler)
+- compile with -DFASTEST to get compression code optimized for speed only
+- in minigzip, try mmap'ing the input file first (Miguel Albrecht)
+- increase size of I/O buffers in minigzip.c and gzio.c (not a big gain
+  on Sun but significant on HP)
+
+- add a pointer to experimental unzip library in README (Gilles Vollant)
+- initialize variable gcc in configure (Chris Herborth)
+
+Changes in 1.0.9 (17 Feb 1998)
+- added gzputs and gzgets functions
+- do not clear eof flag in gzseek (Mark Diekhans)
+- fix gzseek for files in transparent mode (Mark Diekhans)
+- do not assume that vsprintf returns the number of bytes written (Jens Krinke)
+- replace EXPORT with ZEXPORT to avoid conflict with other programs
+- added compress2 in zconf.h, zlib.def, zlib.dnt
+- new asm code from Gilles Vollant in contrib/asm386
+- simplify the inflate code (Mark):
+ . Replace ZALLOC's in huft_build() with single ZALLOC in inflate_blocks_new()
+ . ZALLOC the length list in inflate_trees_fixed() instead of using stack
+ . ZALLOC the value area for huft_build() instead of using stack
+ . Simplify Z_FINISH check in inflate()
+
+- Avoid gcc 2.8.0 comparison bug a little differently than zlib 1.0.8
+- in inftrees.c, avoid cc -O bug on HP (Farshid Elahi)
+- in zconf.h move the ZLIB_DLL stuff earlier to avoid problems with
+  the declaration of FAR (Gilles VOllant)
+- install libz.so* with mode 755 (executable) instead of 644 (Marc Lehmann)
+- read_buf buf parameter of type Bytef* instead of charf*
+- zmemcpy parameters are of type Bytef*, not charf* (Joseph Strout)
+- do not redeclare unlink in minigzip.c for WIN32 (John Bowler)
+- fix check for presence of directories in "make install" (Ian Willis)
+
+Changes in 1.0.8 (27 Jan 1998)
+- fixed offsets in contrib/asm386/gvmat32.asm (Gilles Vollant)
+- fix gzgetc and gzputc for big endian systems (Markus Oberhumer)
+- added compress2() to allow setting the compression level
+- include sys/types.h to get off_t on some systems (Marc Lehmann & QingLong)
+- use constant arrays for the static trees in trees.c instead of computing
+  them at run time (thanks to Ken Raeburn for this suggestion). To create
+  trees.h, compile with GEN_TREES_H and run "make test".
+- check return code of example in "make test" and display result
+- pass minigzip command line options to file_compress
+- simplifying code of inflateSync to avoid gcc 2.8 bug
+
+- support CC="gcc -Wall" in configure -s (QingLong)
+- avoid a flush caused by ftell in gzopen for write mode (Ken Raeburn)
+- fix test for shared library support to avoid compiler warnings
+- zlib.lib -> zlib.dll in msdos/zlib.rc (Gilles Vollant)
+- check for TARGET_OS_MAC in addition to MACOS (Brad Pettit)
+- do not use fdopen for Metrowerks on Mac (Brad Pettit))
+- add checks for gzputc and gzputc in example.c
+- avoid warnings in gzio.c and deflate.c (Andreas Kleinert)
+- use const for the CRC table (Ken Raeburn)
+- fixed "make uninstall" for shared libraries
+- use Tracev instead of Trace in infblock.c
+- in example.c use correct compressed length for test_sync
+- suppress +vnocompatwarnings in configure for HPUX (not always supported)
+
+Changes in 1.0.7 (20 Jan 1998)
+- fix gzseek which was broken in write mode
+- return error for gzseek to negative absolute position
+- fix configure for Linux (Chun-Chung Chen)
+- increase stack space for MSC (Tim Wegner)
+- get_crc_table and inflateSyncPoint are EXPORTed (Gilles Vollant)
+- define EXPORTVA for gzprintf (Gilles Vollant)
+- added man page zlib.3 (Rick Rodgers)
+- for contrib/untgz, fix makedir() and improve Makefile
+
+- check gzseek in write mode in example.c
+- allocate extra buffer for seeks only if gzseek is actually called
+- avoid signed/unsigned comparisons (Tim Wegner, Gilles Vollant)
+- add inflateSyncPoint in zconf.h
+- fix list of exported functions in nt/zlib.dnt and mdsos/zlib.def
+
+Changes in 1.0.6 (19 Jan 1998)
+- add functions gzprintf, gzputc, gzgetc, gztell, gzeof, gzseek, gzrewind and
+  gzsetparams (thanks to Roland Giersig and Kevin Ruland for some of this code)
+- Fix a deflate bug occurring only with compression level 0 (thanks to
+  Andy Buckler for finding this one).
+- In minigzip, pass transparently also the first byte for .Z files.
+- return Z_BUF_ERROR instead of Z_OK if output buffer full in uncompress()
+- check Z_FINISH in inflate (thanks to Marc Schluper)
+- Implement deflateCopy (thanks to Adam Costello)
+- make static libraries by default in configure, add --shared option.
+- move MSDOS or Windows specific files to directory msdos
+- suppress the notion of partial flush to simplify the interface
+  (but the symbol Z_PARTIAL_FLUSH is kept for compatibility with 1.0.4)
+- suppress history buffer provided by application to simplify the interface
+  (this feature was not implemented anyway in 1.0.4)
+- next_in and avail_in must be initialized before calling inflateInit or
+  inflateInit2
+- add EXPORT in all exported functions (for Windows DLL)
+- added Makefile.nt (thanks to Stephen Williams)
+- added the unsupported "contrib" directory:
+   contrib/asm386/ by Gilles Vollant <info@winimage.com>
+        386 asm code replacing longest_match().
+   contrib/iostream/ by Kevin Ruland <kevin@rodin.wustl.edu>
+        A C++ I/O streams interface to the zlib gz* functions
+   contrib/iostream2/  by Tyge Løvset <Tyge.Lovset@cmr.no>
+        Another C++ I/O streams interface
+   contrib/untgz/  by "Pedro A. Aranda Guti\irrez" <paag@tid.es>
+        A very simple tar.gz file extractor using zlib
+   contrib/visual-basic.txt by Carlos Rios <c_rios@sonda.cl>
+        How to use compress(), uncompress() and the gz* functions from VB.
+- pass params -f (filtered data), -h (huffman only), -1 to -9 (compression
+  level) in minigzip (thanks to Tom Lane)
+
+- use const for rommable constants in deflate
+- added test for gzseek and gztell in example.c
+- add undocumented function inflateSyncPoint() (hack for Paul Mackerras)
+- add undocumented function zError to convert error code to string
+  (for Tim Smithers)
+- Allow compilation of gzio with -DNO_DEFLATE to avoid the compression code.
+- Use default memcpy for Symantec MSDOS compiler.
+- Add EXPORT keyword for check_func (needed for Windows DLL)
+- add current directory to LD_LIBRARY_PATH for "make test"
+- create also a link for libz.so.1
+- added support for FUJITSU UXP/DS (thanks to Toshiaki Nomura)
+- use $(SHAREDLIB) instead of libz.so in Makefile.in (for HPUX)
+- added -soname for Linux in configure (Chun-Chung Chen,
+- assign numbers to the exported functions in zlib.def (for Windows DLL)
+- add advice in zlib.h for best usage of deflateSetDictionary
+- work around compiler bug on Atari (cast Z_NULL in call of s->checkfn)
+- allow compilation with ANSI keywords only enabled for TurboC in large model
+- avoid "versionString"[0] (Borland bug)
+- add NEED_DUMMY_RETURN for Borland
+- use variable z_verbose for tracing in debug mode (L. Peter Deutsch).
+- allow compilation with CC
+- defined STDC for OS/2 (David Charlap)
+- limit external names to 8 chars for MVS (Thomas Lund)
+- in minigzip.c, use static buffers only for 16-bit systems
+- fix suffix check for "minigzip -d foo.gz"
+- do not return an error for the 2nd of two consecutive gzflush() (Felix Lee)
+- use _fdopen instead of fdopen for MSC >= 6.0 (Thomas Fanslau)
+- added makelcc.bat for lcc-win32 (Tom St Denis)
+- in Makefile.dj2, use copy and del instead of install and rm (Frank Donahoe)
+- Avoid expanded $Id$. Use "rcs -kb" or "cvs admin -kb" to avoid Id expansion.
+- check for unistd.h in configure (for off_t)
+- remove useless check parameter in inflate_blocks_free
+- avoid useless assignment of s->check to itself in inflate_blocks_new
+- do not flush twice in gzclose (thanks to Ken Raeburn)
+- rename FOPEN as F_OPEN to avoid clash with /usr/include/sys/file.h
+- use NO_ERRNO_H instead of enumeration of operating systems with errno.h
+- work around buggy fclose on pipes for HP/UX
+- support zlib DLL with BORLAND C++ 5.0 (thanks to Glenn Randers-Pehrson)
+- fix configure if CC is already equal to gcc
+
+Changes in 1.0.5 (3 Jan 98)
+- Fix inflate to terminate gracefully when fed corrupted or invalid data
+- Use const for rommable constants in inflate
+- Eliminate memory leaks on error conditions in inflate
+- Removed some vestigial code in inflate
+- Update web address in README
+
+Changes in 1.0.4 (24 Jul 96)
+- In very rare conditions, deflate(s, Z_FINISH) could fail to produce an EOF
+  bit, so the decompressor could decompress all the correct data but went
+  on to attempt decompressing extra garbage data. This affected minigzip too.
+- zlibVersion and gzerror return const char* (needed for DLL)
+- port to RISCOS (no fdopen, no multiple dots, no unlink, no fileno)
+- use z_error only for DEBUG (avoid problem with DLLs)
+
+Changes in 1.0.3 (2 Jul 96)
+- use z_streamp instead of z_stream *, which is now a far pointer in MSDOS
+  small and medium models; this makes the library incompatible with previous
+  versions for these models. (No effect in large model or on other systems.)
+- return OK instead of BUF_ERROR if previous deflate call returned with
+  avail_out as zero but there is nothing to do
+- added memcmp for non STDC compilers
+- define NO_DUMMY_DECL for more Mac compilers (.h files merged incorrectly)
+- define __32BIT__ if __386__ or i386 is defined (pb. with Watcom and SCO)
+- better check for 16-bit mode MSC (avoids problem with Symantec)
+
+Changes in 1.0.2 (23 May 96)
+- added Windows DLL support
+- added a function zlibVersion (for the DLL support)
+- fixed declarations using Bytef in infutil.c (pb with MSDOS medium model)
+- Bytef is define's instead of typedef'd only for Borland C
+- avoid reading uninitialized memory in example.c
+- mention in README that the zlib format is now RFC1950
+- updated Makefile.dj2
+- added algorithm.doc
+
+Changes in 1.0.1 (20 May 96) [1.0 skipped to avoid confusion]
+- fix array overlay in deflate.c which sometimes caused bad compressed data
+- fix inflate bug with empty stored block
+- fix MSDOS medium model which was broken in 0.99
+- fix deflateParams() which could generate bad compressed data.
+- Bytef is define'd instead of typedef'ed (work around Borland bug)
+- added an INDEX file
+- new makefiles for DJGPP (Makefile.dj2), 32-bit Borland (Makefile.b32),
+  Watcom (Makefile.wat), Amiga SAS/C (Makefile.sas)
+- speed up adler32 for modern machines without auto-increment
+- added -ansi for IRIX in configure
+- static_init_done in trees.c is an int
+- define unlink as delete for VMS
+- fix configure for QNX
+- add configure branch for SCO and HPUX
+- avoid many warnings (unused variables, dead assignments, etc...)
+- no fdopen for BeOS
+- fix the Watcom fix for 32 bit mode (define FAR as empty)
+- removed redefinition of Byte for MKWERKS
+- work around an MWKERKS bug (incorrect merge of all .h files)
+
+Changes in 0.99 (27 Jan 96)
+- allow preset dictionary shared between compressor and decompressor
+- allow compression level 0 (no compression)
+- add deflateParams in zlib.h: allow dynamic change of compression level
+  and compression strategy.
+- test large buffers and deflateParams in example.c
+- add optional "configure" to build zlib as a shared library
+- suppress Makefile.qnx, use configure instead
+- fixed deflate for 64-bit systems (detected on Cray)
+- fixed inflate_blocks for 64-bit systems (detected on Alpha)
+- declare Z_DEFLATED in zlib.h (possible parameter for deflateInit2)
+- always return Z_BUF_ERROR when deflate() has nothing to do
+- deflateInit and inflateInit are now macros to allow version checking
+- prefix all global functions and types with z_ with -DZ_PREFIX
+- make falloc completely reentrant (inftrees.c)
+- fixed very unlikely race condition in ct_static_init
+- free in reverse order of allocation to help memory manager
+- use zlib-1.0/* instead of zlib/* inside the tar.gz
+- make zlib warning-free with "gcc -O3 -Wall -Wwrite-strings -Wpointer-arith
+  -Wconversion -Wstrict-prototypes -Wmissing-prototypes"
+- allow gzread on concatenated .gz files
+- deflateEnd now returns Z_DATA_ERROR if it was premature
+- deflate is finally (?) fully deterministic (no matches beyond end of input)
+- Document Z_SYNC_FLUSH
+- add uninstall in Makefile
+- Check for __cpluplus in zlib.h
+- Better test in ct_align for partial flush
+- avoid harmless warnings for Borland C++
+- initialize hash_head in deflate.c
+- avoid warning on fdopen (gzio.c) for HP cc -Aa
+- include stdlib.h for STDC compilers
+- include errno.h for Cray
+- ignore error if ranlib doesn't exist
+- call ranlib twice for NeXTSTEP
+- use exec_prefix instead of prefix for libz.a
+- renamed ct_* as _tr_* to avoid conflict with applications
+- clear z->msg in inflateInit2 before any error return
+- initialize opaque in example.c, gzio.c, deflate.c and inflate.c
+- fixed typo in zconf.h (_GNUC__ => __GNUC__)
+- check for WIN32 in zconf.h and zutil.c (avoid farmalloc in 32-bit mode)
+- fix typo in Make_vms.com (f$trnlnm -> f$getsyi)
+- in fcalloc, normalize pointer if size > 65520 bytes
+- don't use special fcalloc for 32 bit Borland C++
+- use STDC instead of __GO32__ to avoid redeclaring exit, calloc, etc...
+- use Z_BINARY instead of BINARY
+- document that gzclose after gzdopen will close the file
+- allow "a" as mode in gzopen.
+- fix error checking in gzread
+- allow skipping .gz extra-field on pipes
+- added reference to Perl interface in README
+- put the crc table in FAR data (I dislike more and more the medium model :)
+- added get_crc_table
+- added a dimension to all arrays (Borland C can't count).
+- workaround Borland C bug in declaration of inflate_codes_new & inflate_fast
+- guard against multiple inclusion of *.h (for precompiled header on Mac)
+- Watcom C pretends to be Microsoft C small model even in 32 bit mode.
+- don't use unsized arrays to avoid silly warnings by Visual C++:
+     warning C4746: 'inflate_mask' : unsized array treated as  '__far'
+     (what's wrong with far data in far model?).
+- define enum out of inflate_blocks_state to allow compilation with C++
+
+Changes in 0.95 (16 Aug 95)
+- fix MSDOS small and medium model (now easier to adapt to any compiler)
+- inlined send_bits
+- fix the final (:-) bug for deflate with flush (output was correct but
+  not completely flushed in rare occasions).
+- default window size is same for compression and decompression
+  (it's now sufficient to set MAX_WBITS in zconf.h).
+- voidp -> voidpf and voidnp -> voidp (for consistency with other
+  typedefs and because voidnp was not near in large model).
+
+Changes in 0.94 (13 Aug 95)
+- support MSDOS medium model
+- fix deflate with flush (could sometimes generate bad output)
+- fix deflateReset (zlib header was incorrectly suppressed)
+- added support for VMS
+- allow a compression level in gzopen()
+- gzflush now calls fflush
+- For deflate with flush, flush even if no more input is provided.
+- rename libgz.a as libz.a
+- avoid complex expression in infcodes.c triggering Turbo C bug
+- work around a problem with gcc on Alpha (in INSERT_STRING)
+- don't use inline functions (problem with some gcc versions)
+- allow renaming of Byte, uInt, etc... with #define.
+- avoid warning about (unused) pointer before start of array in deflate.c
+- avoid various warnings in gzio.c, example.c, infblock.c, adler32.c, zutil.c
+- avoid reserved word 'new' in trees.c
+
+Changes in 0.93 (25 June 95)
+- temporarily disable inline functions
+- make deflate deterministic
+- give enough lookahead for PARTIAL_FLUSH
+- Set binary mode for stdin/stdout in minigzip.c for OS/2
+- don't even use signed char in inflate (not portable enough)
+- fix inflate memory leak for segmented architectures
+
+Changes in 0.92 (3 May 95)
+- don't assume that char is signed (problem on SGI)
+- Clear bit buffer when starting a stored block
+- no memcpy on Pyramid
+- suppressed inftest.c
+- optimized fill_window, put longest_match inline for gcc
+- optimized inflate on stored blocks.
+- untabify all sources to simplify patches
+
+Changes in 0.91 (2 May 95)
+- Default MEM_LEVEL is 8 (not 9 for Unix) as documented in zlib.h
+- Document the memory requirements in zconf.h
+- added "make install"
+- fix sync search logic in inflateSync
+- deflate(Z_FULL_FLUSH) now works even if output buffer too short
+- after inflateSync, don't scare people with just "lo world"
+- added support for DJGPP
+
+Changes in 0.9 (1 May 95)
+- don't assume that zalloc clears the allocated memory (the TurboC bug
+  was Mark's bug after all :)
+- let again gzread copy uncompressed data unchanged (was working in 0.71)
+- deflate(Z_FULL_FLUSH), inflateReset and inflateSync are now fully implemented
+- added a test of inflateSync in example.c
+- moved MAX_WBITS to zconf.h because users might want to change that.
+- document explicitly that zalloc(64K) on MSDOS must return a normalized
+  pointer (zero offset)
+- added Makefiles for Microsoft C, Turbo C, Borland C++
+- faster crc32()
+
+Changes in 0.8 (29 April 95)
+- added fast inflate (inffast.c)
+- deflate(Z_FINISH) now returns Z_STREAM_END when done. Warning: this
+  is incompatible with previous versions of zlib which returned Z_OK.
+- work around a TurboC compiler bug (bad code for b << 0, see infutil.h)
+  (actually that was not a compiler bug, see 0.81 above)
+- gzread no longer reads one extra byte in certain cases
+- In gzio destroy(), don't reference a freed structure
+- avoid many warnings for MSDOS
+- avoid the ERROR symbol which is used by MS Windows
+
+Changes in 0.71 (14 April 95)
+- Fixed more MSDOS compilation problems :( There is still a bug with
+  TurboC large model.
+
+Changes in 0.7 (14 April 95)
+- Added full inflate support.
+- Simplified the crc32() interface. The pre- and post-conditioning
+  (one's complement) is now done inside crc32(). WARNING: this is
+  incompatible with previous versions; see zlib.h for the new usage.
+
+Changes in 0.61 (12 April 95)
+- workaround for a bug in TurboC. example and minigzip now work on MSDOS.
+
+Changes in 0.6 (11 April 95)
+- added minigzip.c
+- added gzdopen to reopen a file descriptor as gzFile
+- added transparent reading of non-gziped files in gzread.
+- fixed bug in gzread (don't read crc as data)
+- fixed bug in destroy (gzio.c) (don't return Z_STREAM_END for gzclose).
+- don't allocate big arrays in the stack (for MSDOS)
+- fix some MSDOS compilation problems
+
+Changes in 0.5:
+- do real compression in deflate.c. Z_PARTIAL_FLUSH is supported but
+  not yet Z_FULL_FLUSH.
+- support decompression but only in a single step (forced Z_FINISH)
+- added opaque object for zalloc and zfree.
+- added deflateReset and inflateReset
+- added a variable zlib_version for consistency checking.
+- renamed the 'filter' parameter of deflateInit2 as 'strategy'.
+  Added Z_FILTERED and Z_HUFFMAN_ONLY constants.
+
+Changes in 0.4:
+- avoid "zip" everywhere, use zlib instead of ziplib.
+- suppress Z_BLOCK_FLUSH, interpret Z_PARTIAL_FLUSH as block flush
+  if compression method == 8.
+- added adler32 and crc32
+- renamed deflateOptions as deflateInit2, call one or the other but not both
+- added the method parameter for deflateInit2.
+- added inflateInit2
+- simplied considerably deflateInit and inflateInit by not supporting
+  user-provided history buffer. This is supported only in deflateInit2
+  and inflateInit2.
+
+Changes in 0.3:
+- prefix all macro names with Z_
+- use Z_FINISH instead of deflateEnd to finish compression.
+- added Z_HUFFMAN_ONLY
+- added gzerror()
diff --git a/libraries/zlib/FAQ b/libraries/zlib/FAQ
new file mode 100644
index 000000000..99b7cf92e
--- /dev/null
+++ b/libraries/zlib/FAQ
@@ -0,0 +1,368 @@
+
+                Frequently Asked Questions about zlib
+
+
+If your question is not there, please check the zlib home page
+http://zlib.net/ which may have more recent information.
+The lastest zlib FAQ is at http://zlib.net/zlib_faq.html
+
+
+ 1. Is zlib Y2K-compliant?
+
+    Yes. zlib doesn't handle dates.
+
+ 2. Where can I get a Windows DLL version?
+
+    The zlib sources can be compiled without change to produce a DLL.  See the
+    file win32/DLL_FAQ.txt in the zlib distribution.  Pointers to the
+    precompiled DLL are found in the zlib web site at http://zlib.net/ .
+
+ 3. Where can I get a Visual Basic interface to zlib?
+
+    See
+        * http://marknelson.us/1997/01/01/zlib-engine/
+        * win32/DLL_FAQ.txt in the zlib distribution
+
+ 4. compress() returns Z_BUF_ERROR.
+
+    Make sure that before the call of compress(), the length of the compressed
+    buffer is equal to the available size of the compressed buffer and not
+    zero.  For Visual Basic, check that this parameter is passed by reference
+    ("as any"), not by value ("as long").
+
+ 5. deflate() or inflate() returns Z_BUF_ERROR.
+
+    Before making the call, make sure that avail_in and avail_out are not zero.
+    When setting the parameter flush equal to Z_FINISH, also make sure that
+    avail_out is big enough to allow processing all pending input.  Note that a
+    Z_BUF_ERROR is not fatal--another call to deflate() or inflate() can be
+    made with more input or output space.  A Z_BUF_ERROR may in fact be
+    unavoidable depending on how the functions are used, since it is not
+    possible to tell whether or not there is more output pending when
+    strm.avail_out returns with zero.  See http://zlib.net/zlib_how.html for a
+    heavily annotated example.
+
+ 6. Where's the zlib documentation (man pages, etc.)?
+
+    It's in zlib.h .  Examples of zlib usage are in the files test/example.c
+    and test/minigzip.c, with more in examples/ .
+
+ 7. Why don't you use GNU autoconf or libtool or ...?
+
+    Because we would like to keep zlib as a very small and simple package.
+    zlib is rather portable and doesn't need much configuration.
+
+ 8. I found a bug in zlib.
+
+    Most of the time, such problems are due to an incorrect usage of zlib.
+    Please try to reproduce the problem with a small program and send the
+    corresponding source to us at zlib@gzip.org .  Do not send multi-megabyte
+    data files without prior agreement.
+
+ 9. Why do I get "undefined reference to gzputc"?
+
+    If "make test" produces something like
+
+       example.o(.text+0x154): undefined reference to `gzputc'
+
+    check that you don't have old files libz.* in /usr/lib, /usr/local/lib or
+    /usr/X11R6/lib. Remove any old versions, then do "make install".
+
+10. I need a Delphi interface to zlib.
+
+    See the contrib/delphi directory in the zlib distribution.
+
+11. Can zlib handle .zip archives?
+
+    Not by itself, no.  See the directory contrib/minizip in the zlib
+    distribution.
+
+12. Can zlib handle .Z files?
+
+    No, sorry.  You have to spawn an uncompress or gunzip subprocess, or adapt
+    the code of uncompress on your own.
+
+13. How can I make a Unix shared library?
+
+    By default a shared (and a static) library is built for Unix.  So:
+
+    make distclean
+    ./configure
+    make
+
+14. How do I install a shared zlib library on Unix?
+
+    After the above, then:
+
+    make install
+
+    However, many flavors of Unix come with a shared zlib already installed.
+    Before going to the trouble of compiling a shared version of zlib and
+    trying to install it, you may want to check if it's already there!  If you
+    can #include <zlib.h>, it's there.  The -lz option will probably link to
+    it.  You can check the version at the top of zlib.h or with the
+    ZLIB_VERSION symbol defined in zlib.h .
+
+15. I have a question about OttoPDF.
+
+    We are not the authors of OttoPDF. The real author is on the OttoPDF web
+    site: Joel Hainley, jhainley@myndkryme.com.
+
+16. Can zlib decode Flate data in an Adobe PDF file?
+
+    Yes. See http://www.pdflib.com/ . To modify PDF forms, see
+    http://sourceforge.net/projects/acroformtool/ .
+
+17. Why am I getting this "register_frame_info not found" error on Solaris?
+
+    After installing zlib 1.1.4 on Solaris 2.6, running applications using zlib
+    generates an error such as:
+
+        ld.so.1: rpm: fatal: relocation error: file /usr/local/lib/libz.so:
+        symbol __register_frame_info: referenced symbol not found
+
+    The symbol __register_frame_info is not part of zlib, it is generated by
+    the C compiler (cc or gcc).  You must recompile applications using zlib
+    which have this problem.  This problem is specific to Solaris.  See
+    http://www.sunfreeware.com for Solaris versions of zlib and applications
+    using zlib.
+
+18. Why does gzip give an error on a file I make with compress/deflate?
+
+    The compress and deflate functions produce data in the zlib format, which
+    is different and incompatible with the gzip format.  The gz* functions in
+    zlib on the other hand use the gzip format.  Both the zlib and gzip formats
+    use the same compressed data format internally, but have different headers
+    and trailers around the compressed data.
+
+19. Ok, so why are there two different formats?
+
+    The gzip format was designed to retain the directory information about a
+    single file, such as the name and last modification date.  The zlib format
+    on the other hand was designed for in-memory and communication channel
+    applications, and has a much more compact header and trailer and uses a
+    faster integrity check than gzip.
+
+20. Well that's nice, but how do I make a gzip file in memory?
+
+    You can request that deflate write the gzip format instead of the zlib
+    format using deflateInit2().  You can also request that inflate decode the
+    gzip format using inflateInit2().  Read zlib.h for more details.
+
+21. Is zlib thread-safe?
+
+    Yes.  However any library routines that zlib uses and any application-
+    provided memory allocation routines must also be thread-safe.  zlib's gz*
+    functions use stdio library routines, and most of zlib's functions use the
+    library memory allocation routines by default.  zlib's *Init* functions
+    allow for the application to provide custom memory allocation routines.
+
+    Of course, you should only operate on any given zlib or gzip stream from a
+    single thread at a time.
+
+22. Can I use zlib in my commercial application?
+
+    Yes.  Please read the license in zlib.h.
+
+23. Is zlib under the GNU license?
+
+    No.  Please read the license in zlib.h.
+
+24. The license says that altered source versions must be "plainly marked". So
+    what exactly do I need to do to meet that requirement?
+
+    You need to change the ZLIB_VERSION and ZLIB_VERNUM #defines in zlib.h.  In
+    particular, the final version number needs to be changed to "f", and an
+    identification string should be appended to ZLIB_VERSION.  Version numbers
+    x.x.x.f are reserved for modifications to zlib by others than the zlib
+    maintainers.  For example, if the version of the base zlib you are altering
+    is "1.2.3.4", then in zlib.h you should change ZLIB_VERNUM to 0x123f, and
+    ZLIB_VERSION to something like "1.2.3.f-zachary-mods-v3".  You can also
+    update the version strings in deflate.c and inftrees.c.
+
+    For altered source distributions, you should also note the origin and
+    nature of the changes in zlib.h, as well as in ChangeLog and README, along
+    with the dates of the alterations.  The origin should include at least your
+    name (or your company's name), and an email address to contact for help or
+    issues with the library.
+
+    Note that distributing a compiled zlib library along with zlib.h and
+    zconf.h is also a source distribution, and so you should change
+    ZLIB_VERSION and ZLIB_VERNUM and note the origin and nature of the changes
+    in zlib.h as you would for a full source distribution.
+
+25. Will zlib work on a big-endian or little-endian architecture, and can I
+    exchange compressed data between them?
+
+    Yes and yes.
+
+26. Will zlib work on a 64-bit machine?
+
+    Yes.  It has been tested on 64-bit machines, and has no dependence on any
+    data types being limited to 32-bits in length.  If you have any
+    difficulties, please provide a complete problem report to zlib@gzip.org
+
+27. Will zlib decompress data from the PKWare Data Compression Library?
+
+    No.  The PKWare DCL uses a completely different compressed data format than
+    does PKZIP and zlib.  However, you can look in zlib's contrib/blast
+    directory for a possible solution to your problem.
+
+28. Can I access data randomly in a compressed stream?
+
+    No, not without some preparation.  If when compressing you periodically use
+    Z_FULL_FLUSH, carefully write all the pending data at those points, and
+    keep an index of those locations, then you can start decompression at those
+    points.  You have to be careful to not use Z_FULL_FLUSH too often, since it
+    can significantly degrade compression.  Alternatively, you can scan a
+    deflate stream once to generate an index, and then use that index for
+    random access.  See examples/zran.c .
+
+29. Does zlib work on MVS, OS/390, CICS, etc.?
+
+    It has in the past, but we have not heard of any recent evidence.  There
+    were working ports of zlib 1.1.4 to MVS, but those links no longer work.
+    If you know of recent, successful applications of zlib on these operating
+    systems, please let us know.  Thanks.
+
+30. Is there some simpler, easier to read version of inflate I can look at to
+    understand the deflate format?
+
+    First off, you should read RFC 1951.  Second, yes.  Look in zlib's
+    contrib/puff directory.
+
+31. Does zlib infringe on any patents?
+
+    As far as we know, no.  In fact, that was originally the whole point behind
+    zlib.  Look here for some more information:
+
+    http://www.gzip.org/#faq11
+
+32. Can zlib work with greater than 4 GB of data?
+
+    Yes.  inflate() and deflate() will process any amount of data correctly.
+    Each call of inflate() or deflate() is limited to input and output chunks
+    of the maximum value that can be stored in the compiler's "unsigned int"
+    type, but there is no limit to the number of chunks.  Note however that the
+    strm.total_in and strm_total_out counters may be limited to 4 GB.  These
+    counters are provided as a convenience and are not used internally by
+    inflate() or deflate().  The application can easily set up its own counters
+    updated after each call of inflate() or deflate() to count beyond 4 GB.
+    compress() and uncompress() may be limited to 4 GB, since they operate in a
+    single call.  gzseek() and gztell() may be limited to 4 GB depending on how
+    zlib is compiled.  See the zlibCompileFlags() function in zlib.h.
+
+    The word "may" appears several times above since there is a 4 GB limit only
+    if the compiler's "long" type is 32 bits.  If the compiler's "long" type is
+    64 bits, then the limit is 16 exabytes.
+
+33. Does zlib have any security vulnerabilities?
+
+    The only one that we are aware of is potentially in gzprintf().  If zlib is
+    compiled to use sprintf() or vsprintf(), then there is no protection
+    against a buffer overflow of an 8K string space (or other value as set by
+    gzbuffer()), other than the caller of gzprintf() assuring that the output
+    will not exceed 8K.  On the other hand, if zlib is compiled to use
+    snprintf() or vsnprintf(), which should normally be the case, then there is
+    no vulnerability.  The ./configure script will display warnings if an
+    insecure variation of sprintf() will be used by gzprintf().  Also the
+    zlibCompileFlags() function will return information on what variant of
+    sprintf() is used by gzprintf().
+
+    If you don't have snprintf() or vsnprintf() and would like one, you can
+    find a portable implementation here:
+
+        http://www.ijs.si/software/snprintf/
+
+    Note that you should be using the most recent version of zlib.  Versions
+    1.1.3 and before were subject to a double-free vulnerability, and versions
+    1.2.1 and 1.2.2 were subject to an access exception when decompressing
+    invalid compressed data.
+
+34. Is there a Java version of zlib?
+
+    Probably what you want is to use zlib in Java. zlib is already included
+    as part of the Java SDK in the java.util.zip package. If you really want
+    a version of zlib written in the Java language, look on the zlib home
+    page for links: http://zlib.net/ .
+
+35. I get this or that compiler or source-code scanner warning when I crank it
+    up to maximally-pedantic. Can't you guys write proper code?
+
+    Many years ago, we gave up attempting to avoid warnings on every compiler
+    in the universe.  It just got to be a waste of time, and some compilers
+    were downright silly as well as contradicted each other.  So now, we simply
+    make sure that the code always works.
+
+36. Valgrind (or some similar memory access checker) says that deflate is
+    performing a conditional jump that depends on an uninitialized value.
+    Isn't that a bug?
+
+    No.  That is intentional for performance reasons, and the output of deflate
+    is not affected.  This only started showing up recently since zlib 1.2.x
+    uses malloc() by default for allocations, whereas earlier versions used
+    calloc(), which zeros out the allocated memory.  Even though the code was
+    correct, versions 1.2.4 and later was changed to not stimulate these
+    checkers.
+
+37. Will zlib read the (insert any ancient or arcane format here) compressed
+    data format?
+
+    Probably not. Look in the comp.compression FAQ for pointers to various
+    formats and associated software.
+
+38. How can I encrypt/decrypt zip files with zlib?
+
+    zlib doesn't support encryption.  The original PKZIP encryption is very
+    weak and can be broken with freely available programs.  To get strong
+    encryption, use GnuPG, http://www.gnupg.org/ , which already includes zlib
+    compression.  For PKZIP compatible "encryption", look at
+    http://www.info-zip.org/
+
+39. What's the difference between the "gzip" and "deflate" HTTP 1.1 encodings?
+
+    "gzip" is the gzip format, and "deflate" is the zlib format.  They should
+    probably have called the second one "zlib" instead to avoid confusion with
+    the raw deflate compressed data format.  While the HTTP 1.1 RFC 2616
+    correctly points to the zlib specification in RFC 1950 for the "deflate"
+    transfer encoding, there have been reports of servers and browsers that
+    incorrectly produce or expect raw deflate data per the deflate
+    specification in RFC 1951, most notably Microsoft.  So even though the
+    "deflate" transfer encoding using the zlib format would be the more
+    efficient approach (and in fact exactly what the zlib format was designed
+    for), using the "gzip" transfer encoding is probably more reliable due to
+    an unfortunate choice of name on the part of the HTTP 1.1 authors.
+
+    Bottom line: use the gzip format for HTTP 1.1 encoding.
+
+40. Does zlib support the new "Deflate64" format introduced by PKWare?
+
+    No.  PKWare has apparently decided to keep that format proprietary, since
+    they have not documented it as they have previous compression formats.  In
+    any case, the compression improvements are so modest compared to other more
+    modern approaches, that it's not worth the effort to implement.
+
+41. I'm having a problem with the zip functions in zlib, can you help?
+
+    There are no zip functions in zlib.  You are probably using minizip by
+    Giles Vollant, which is found in the contrib directory of zlib.  It is not
+    part of zlib.  In fact none of the stuff in contrib is part of zlib.  The
+    files in there are not supported by the zlib authors.  You need to contact
+    the authors of the respective contribution for help.
+
+42. The match.asm code in contrib is under the GNU General Public License.
+    Since it's part of zlib, doesn't that mean that all of zlib falls under the
+    GNU GPL?
+
+    No.  The files in contrib are not part of zlib.  They were contributed by
+    other authors and are provided as a convenience to the user within the zlib
+    distribution.  Each item in contrib has its own license.
+
+43. Is zlib subject to export controls?  What is its ECCN?
+
+    zlib is not subject to export controls, and so is classified as EAR99.
+
+44. Can you please sign these lengthy legal documents and fax them back to us
+    so that we can use your software in our product?
+
+    No. Go away. Shoo.
diff --git a/libraries/zlib/INDEX b/libraries/zlib/INDEX
new file mode 100644
index 000000000..2ba064120
--- /dev/null
+++ b/libraries/zlib/INDEX
@@ -0,0 +1,68 @@
+CMakeLists.txt  cmake build file
+ChangeLog       history of changes
+FAQ             Frequently Asked Questions about zlib
+INDEX           this file
+Makefile        dummy Makefile that tells you to ./configure
+Makefile.in     template for Unix Makefile
+README          guess what
+configure       configure script for Unix
+make_vms.com    makefile for VMS
+test/example.c  zlib usages examples for build testing
+test/minigzip.c minimal gzip-like functionality for build testing
+test/infcover.c inf*.c code coverage for build coverage testing
+treebuild.xml   XML description of source file dependencies
+zconf.h.cmakein zconf.h template for cmake
+zconf.h.in      zconf.h template for configure
+zlib.3          Man page for zlib
+zlib.3.pdf      Man page in PDF format
+zlib.map        Linux symbol information
+zlib.pc.in      Template for pkg-config descriptor
+zlib.pc.cmakein zlib.pc template for cmake
+zlib2ansi       perl script to convert source files for C++ compilation
+
+amiga/          makefiles for Amiga SAS C
+as400/          makefiles for AS/400
+doc/            documentation for formats and algorithms
+msdos/          makefiles for MSDOS
+nintendods/     makefile for Nintendo DS
+old/            makefiles for various architectures and zlib documentation
+                files that have not yet been updated for zlib 1.2.x
+qnx/            makefiles for QNX
+watcom/         makefiles for OpenWatcom
+win32/          makefiles for Windows
+
+                zlib public header files (required for library use):
+zconf.h
+zlib.h
+
+                private source files used to build the zlib library:
+adler32.c
+compress.c
+crc32.c
+crc32.h
+deflate.c
+deflate.h
+gzclose.c
+gzguts.h
+gzlib.c
+gzread.c
+gzwrite.c
+infback.c
+inffast.c
+inffast.h
+inffixed.h
+inflate.c
+inflate.h
+inftrees.c
+inftrees.h
+trees.c
+trees.h
+uncompr.c
+zutil.c
+zutil.h
+
+                source files for sample programs
+See examples/README.examples
+
+                unsupported contributions by third parties
+See contrib/README.contrib
diff --git a/libraries/zlib/README b/libraries/zlib/README
new file mode 100644
index 000000000..51106de47
--- /dev/null
+++ b/libraries/zlib/README
@@ -0,0 +1,115 @@
+ZLIB DATA COMPRESSION LIBRARY
+
+zlib 1.2.11 is a general purpose data compression library.  All the code is
+thread safe.  The data format used by the zlib library is described by RFCs
+(Request for Comments) 1950 to 1952 in the files
+http://tools.ietf.org/html/rfc1950 (zlib format), rfc1951 (deflate format) and
+rfc1952 (gzip format).
+
+All functions of the compression library are documented in the file zlib.h
+(volunteer to write man pages welcome, contact zlib@gzip.org).  A usage example
+of the library is given in the file test/example.c which also tests that
+the library is working correctly.  Another example is given in the file
+test/minigzip.c.  The compression library itself is composed of all source
+files in the root directory.
+
+To compile all files and run the test program, follow the instructions given at
+the top of Makefile.in.  In short "./configure; make test", and if that goes
+well, "make install" should work for most flavors of Unix.  For Windows, use
+one of the special makefiles in win32/ or contrib/vstudio/ .  For VMS, use
+make_vms.com.
+
+Questions about zlib should be sent to <zlib@gzip.org>, or to Gilles Vollant
+<info@winimage.com> for the Windows DLL version.  The zlib home page is
+http://zlib.net/ .  Before reporting a problem, please check this site to
+verify that you have the latest version of zlib; otherwise get the latest
+version and check whether the problem still exists or not.
+
+PLEASE read the zlib FAQ http://zlib.net/zlib_faq.html before asking for help.
+
+Mark Nelson <markn@ieee.org> wrote an article about zlib for the Jan.  1997
+issue of Dr.  Dobb's Journal; a copy of the article is available at
+http://marknelson.us/1997/01/01/zlib-engine/ .
+
+The changes made in version 1.2.11 are documented in the file ChangeLog.
+
+Unsupported third party contributions are provided in directory contrib/ .
+
+zlib is available in Java using the java.util.zip package, documented at
+http://java.sun.com/developer/technicalArticles/Programming/compression/ .
+
+A Perl interface to zlib written by Paul Marquess <pmqs@cpan.org> is available
+at CPAN (Comprehensive Perl Archive Network) sites, including
+http://search.cpan.org/~pmqs/IO-Compress-Zlib/ .
+
+A Python interface to zlib written by A.M. Kuchling <amk@amk.ca> is
+available in Python 1.5 and later versions, see
+http://docs.python.org/library/zlib.html .
+
+zlib is built into tcl: http://wiki.tcl.tk/4610 .
+
+An experimental package to read and write files in .zip format, written on top
+of zlib by Gilles Vollant <info@winimage.com>, is available in the
+contrib/minizip directory of zlib.
+
+
+Notes for some targets:
+
+- For Windows DLL versions, please see win32/DLL_FAQ.txt
+
+- For 64-bit Irix, deflate.c must be compiled without any optimization. With
+  -O, one libpng test fails. The test works in 32 bit mode (with the -n32
+  compiler flag). The compiler bug has been reported to SGI.
+
+- zlib doesn't work with gcc 2.6.3 on a DEC 3000/300LX under OSF/1 2.1 it works
+  when compiled with cc.
+
+- On Digital Unix 4.0D (formely OSF/1) on AlphaServer, the cc option -std1 is
+  necessary to get gzprintf working correctly. This is done by configure.
+
+- zlib doesn't work on HP-UX 9.05 with some versions of /bin/cc. It works with
+  other compilers. Use "make test" to check your compiler.
+
+- gzdopen is not supported on RISCOS or BEOS.
+
+- For PalmOs, see http://palmzlib.sourceforge.net/
+
+
+Acknowledgments:
+
+  The deflate format used by zlib was defined by Phil Katz.  The deflate and
+  zlib specifications were written by L.  Peter Deutsch.  Thanks to all the
+  people who reported problems and suggested various improvements in zlib; they
+  are too numerous to cite here.
+
+Copyright notice:
+
+ (C) 1995-2017 Jean-loup Gailly and Mark Adler
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+
+  Jean-loup Gailly        Mark Adler
+  jloup@gzip.org          madler@alumni.caltech.edu
+
+If you use the zlib library in a product, we would appreciate *not* receiving
+lengthy legal documents to sign.  The sources are provided for free but without
+warranty of any kind.  The library has been entirely written by Jean-loup
+Gailly and Mark Adler; it does not include third-party code.
+
+If you redistribute modified sources, we would appreciate that you include in
+the file ChangeLog history information documenting your changes.  Please read
+the FAQ for more information on the distribution of modified source versions.
diff --git a/libraries/zlib/adler32.c b/libraries/zlib/adler32.c
new file mode 100644
index 000000000..d0be4380a
--- /dev/null
+++ b/libraries/zlib/adler32.c
@@ -0,0 +1,186 @@
+/* adler32.c -- compute the Adler-32 checksum of a data stream
+ * Copyright (C) 1995-2011, 2016 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* @(#) $Id$ */
+
+#include "zutil.h"
+
+local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2));
+
+#define BASE 65521U     /* largest prime smaller than 65536 */
+#define NMAX 5552
+/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
+
+#define DO1(buf,i)  {adler += (buf)[i]; sum2 += adler;}
+#define DO2(buf,i)  DO1(buf,i); DO1(buf,i+1);
+#define DO4(buf,i)  DO2(buf,i); DO2(buf,i+2);
+#define DO8(buf,i)  DO4(buf,i); DO4(buf,i+4);
+#define DO16(buf)   DO8(buf,0); DO8(buf,8);
+
+/* use NO_DIVIDE if your processor does not do division in hardware --
+   try it both ways to see which is faster */
+#ifdef NO_DIVIDE
+/* note that this assumes BASE is 65521, where 65536 % 65521 == 15
+   (thank you to John Reiser for pointing this out) */
+#  define CHOP(a) \
+    do { \
+        unsigned long tmp = a >> 16; \
+        a &= 0xffffUL; \
+        a += (tmp << 4) - tmp; \
+    } while (0)
+#  define MOD28(a) \
+    do { \
+        CHOP(a); \
+        if (a >= BASE) a -= BASE; \
+    } while (0)
+#  define MOD(a) \
+    do { \
+        CHOP(a); \
+        MOD28(a); \
+    } while (0)
+#  define MOD63(a) \
+    do { /* this assumes a is not negative */ \
+        z_off64_t tmp = a >> 32; \
+        a &= 0xffffffffL; \
+        a += (tmp << 8) - (tmp << 5) + tmp; \
+        tmp = a >> 16; \
+        a &= 0xffffL; \
+        a += (tmp << 4) - tmp; \
+        tmp = a >> 16; \
+        a &= 0xffffL; \
+        a += (tmp << 4) - tmp; \
+        if (a >= BASE) a -= BASE; \
+    } while (0)
+#else
+#  define MOD(a) a %= BASE
+#  define MOD28(a) a %= BASE
+#  define MOD63(a) a %= BASE
+#endif
+
+/* ========================================================================= */
+uLong ZEXPORT adler32_z(adler, buf, len)
+    uLong adler;
+    const Bytef *buf;
+    z_size_t len;
+{
+    unsigned long sum2;
+    unsigned n;
+
+    /* split Adler-32 into component sums */
+    sum2 = (adler >> 16) & 0xffff;
+    adler &= 0xffff;
+
+    /* in case user likes doing a byte at a time, keep it fast */
+    if (len == 1) {
+        adler += buf[0];
+        if (adler >= BASE)
+            adler -= BASE;
+        sum2 += adler;
+        if (sum2 >= BASE)
+            sum2 -= BASE;
+        return adler | (sum2 << 16);
+    }
+
+    /* initial Adler-32 value (deferred check for len == 1 speed) */
+    if (buf == Z_NULL)
+        return 1L;
+
+    /* in case short lengths are provided, keep it somewhat fast */
+    if (len < 16) {
+        while (len--) {
+            adler += *buf++;
+            sum2 += adler;
+        }
+        if (adler >= BASE)
+            adler -= BASE;
+        MOD28(sum2);            /* only added so many BASE's */
+        return adler | (sum2 << 16);
+    }
+
+    /* do length NMAX blocks -- requires just one modulo operation */
+    while (len >= NMAX) {
+        len -= NMAX;
+        n = NMAX / 16;          /* NMAX is divisible by 16 */
+        do {
+            DO16(buf);          /* 16 sums unrolled */
+            buf += 16;
+        } while (--n);
+        MOD(adler);
+        MOD(sum2);
+    }
+
+    /* do remaining bytes (less than NMAX, still just one modulo) */
+    if (len) {                  /* avoid modulos if none remaining */
+        while (len >= 16) {
+            len -= 16;
+            DO16(buf);
+            buf += 16;
+        }
+        while (len--) {
+            adler += *buf++;
+            sum2 += adler;
+        }
+        MOD(adler);
+        MOD(sum2);
+    }
+
+    /* return recombined sums */
+    return adler | (sum2 << 16);
+}
+
+/* ========================================================================= */
+uLong ZEXPORT adler32(adler, buf, len)
+    uLong adler;
+    const Bytef *buf;
+    uInt len;
+{
+    return adler32_z(adler, buf, len);
+}
+
+/* ========================================================================= */
+local uLong adler32_combine_(adler1, adler2, len2)
+    uLong adler1;
+    uLong adler2;
+    z_off64_t len2;
+{
+    unsigned long sum1;
+    unsigned long sum2;
+    unsigned rem;
+
+    /* for negative len, return invalid adler32 as a clue for debugging */
+    if (len2 < 0)
+        return 0xffffffffUL;
+
+    /* the derivation of this formula is left as an exercise for the reader */
+    MOD63(len2);                /* assumes len2 >= 0 */
+    rem = (unsigned)len2;
+    sum1 = adler1 & 0xffff;
+    sum2 = rem * sum1;
+    MOD(sum2);
+    sum1 += (adler2 & 0xffff) + BASE - 1;
+    sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
+    if (sum1 >= BASE) sum1 -= BASE;
+    if (sum1 >= BASE) sum1 -= BASE;
+    if (sum2 >= ((unsigned long)BASE << 1)) sum2 -= ((unsigned long)BASE << 1);
+    if (sum2 >= BASE) sum2 -= BASE;
+    return sum1 | (sum2 << 16);
+}
+
+/* ========================================================================= */
+uLong ZEXPORT adler32_combine(adler1, adler2, len2)
+    uLong adler1;
+    uLong adler2;
+    z_off_t len2;
+{
+    return adler32_combine_(adler1, adler2, len2);
+}
+
+uLong ZEXPORT adler32_combine64(adler1, adler2, len2)
+    uLong adler1;
+    uLong adler2;
+    z_off64_t len2;
+{
+    return adler32_combine_(adler1, adler2, len2);
+}
diff --git a/libraries/zlib/algorithm.txt b/libraries/zlib/algorithm.txt
new file mode 100644
index 000000000..c97f49502
--- /dev/null
+++ b/libraries/zlib/algorithm.txt
@@ -0,0 +1,209 @@
+1. Compression algorithm (deflate)
+
+The deflation algorithm used by gzip (also zip and zlib) is a variation of
+LZ77 (Lempel-Ziv 1977, see reference below). It finds duplicated strings in
+the input data.  The second occurrence of a string is replaced by a
+pointer to the previous string, in the form of a pair (distance,
+length).  Distances are limited to 32K bytes, and lengths are limited
+to 258 bytes. When a string does not occur anywhere in the previous
+32K bytes, it is emitted as a sequence of literal bytes.  (In this
+description, `string' must be taken as an arbitrary sequence of bytes,
+and is not restricted to printable characters.)
+
+Literals or match lengths are compressed with one Huffman tree, and
+match distances are compressed with another tree. The trees are stored
+in a compact form at the start of each block. The blocks can have any
+size (except that the compressed data for one block must fit in
+available memory). A block is terminated when deflate() determines that
+it would be useful to start another block with fresh trees. (This is
+somewhat similar to the behavior of LZW-based _compress_.)
+
+Duplicated strings are found using a hash table. All input strings of
+length 3 are inserted in the hash table. A hash index is computed for
+the next 3 bytes. If the hash chain for this index is not empty, all
+strings in the chain are compared with the current input string, and
+the longest match is selected.
+
+The hash chains are searched starting with the most recent strings, to
+favor small distances and thus take advantage of the Huffman encoding.
+The hash chains are singly linked. There are no deletions from the
+hash chains, the algorithm simply discards matches that are too old.
+
+To avoid a worst-case situation, very long hash chains are arbitrarily
+truncated at a certain length, determined by a runtime option (level
+parameter of deflateInit). So deflate() does not always find the longest
+possible match but generally finds a match which is long enough.
+
+deflate() also defers the selection of matches with a lazy evaluation
+mechanism. After a match of length N has been found, deflate() searches for
+a longer match at the next input byte. If a longer match is found, the
+previous match is truncated to a length of one (thus producing a single
+literal byte) and the process of lazy evaluation begins again. Otherwise,
+the original match is kept, and the next match search is attempted only N
+steps later.
+
+The lazy match evaluation is also subject to a runtime parameter. If
+the current match is long enough, deflate() reduces the search for a longer
+match, thus speeding up the whole process. If compression ratio is more
+important than speed, deflate() attempts a complete second search even if
+the first match is already long enough.
+
+The lazy match evaluation is not performed for the fastest compression
+modes (level parameter 1 to 3). For these fast modes, new strings
+are inserted in the hash table only when no match was found, or
+when the match is not too long. This degrades the compression ratio
+but saves time since there are both fewer insertions and fewer searches.
+
+
+2. Decompression algorithm (inflate)
+
+2.1 Introduction
+
+The key question is how to represent a Huffman code (or any prefix code) so
+that you can decode fast.  The most important characteristic is that shorter
+codes are much more common than longer codes, so pay attention to decoding the
+short codes fast, and let the long codes take longer to decode.
+
+inflate() sets up a first level table that covers some number of bits of
+input less than the length of longest code.  It gets that many bits from the
+stream, and looks it up in the table.  The table will tell if the next
+code is that many bits or less and how many, and if it is, it will tell
+the value, else it will point to the next level table for which inflate()
+grabs more bits and tries to decode a longer code.
+
+How many bits to make the first lookup is a tradeoff between the time it
+takes to decode and the time it takes to build the table.  If building the
+table took no time (and if you had infinite memory), then there would only
+be a first level table to cover all the way to the longest code.  However,
+building the table ends up taking a lot longer for more bits since short
+codes are replicated many times in such a table.  What inflate() does is
+simply to make the number of bits in the first table a variable, and  then
+to set that variable for the maximum speed.
+
+For inflate, which has 286 possible codes for the literal/length tree, the size
+of the first table is nine bits.  Also the distance trees have 30 possible
+values, and the size of the first table is six bits.  Note that for each of
+those cases, the table ended up one bit longer than the ``average'' code
+length, i.e. the code length of an approximately flat code which would be a
+little more than eight bits for 286 symbols and a little less than five bits
+for 30 symbols.
+
+
+2.2 More details on the inflate table lookup
+
+Ok, you want to know what this cleverly obfuscated inflate tree actually
+looks like.  You are correct that it's not a Huffman tree.  It is simply a
+lookup table for the first, let's say, nine bits of a Huffman symbol.  The
+symbol could be as short as one bit or as long as 15 bits.  If a particular
+symbol is shorter than nine bits, then that symbol's translation is duplicated
+in all those entries that start with that symbol's bits.  For example, if the
+symbol is four bits, then it's duplicated 32 times in a nine-bit table.  If a
+symbol is nine bits long, it appears in the table once.
+
+If the symbol is longer than nine bits, then that entry in the table points
+to another similar table for the remaining bits.  Again, there are duplicated
+entries as needed.  The idea is that most of the time the symbol will be short
+and there will only be one table look up.  (That's whole idea behind data
+compression in the first place.)  For the less frequent long symbols, there
+will be two lookups.  If you had a compression method with really long
+symbols, you could have as many levels of lookups as is efficient.  For
+inflate, two is enough.
+
+So a table entry either points to another table (in which case nine bits in
+the above example are gobbled), or it contains the translation for the symbol
+and the number of bits to gobble.  Then you start again with the next
+ungobbled bit.
+
+You may wonder: why not just have one lookup table for how ever many bits the
+longest symbol is?  The reason is that if you do that, you end up spending
+more time filling in duplicate symbol entries than you do actually decoding.
+At least for deflate's output that generates new trees every several 10's of
+kbytes.  You can imagine that filling in a 2^15 entry table for a 15-bit code
+would take too long if you're only decoding several thousand symbols.  At the
+other extreme, you could make a new table for every bit in the code.  In fact,
+that's essentially a Huffman tree.  But then you spend too much time
+traversing the tree while decoding, even for short symbols.
+
+So the number of bits for the first lookup table is a trade of the time to
+fill out the table vs. the time spent looking at the second level and above of
+the table.
+
+Here is an example, scaled down:
+
+The code being decoded, with 10 symbols, from 1 to 6 bits long:
+
+A: 0
+B: 10
+C: 1100
+D: 11010
+E: 11011
+F: 11100
+G: 11101
+H: 11110
+I: 111110
+J: 111111
+
+Let's make the first table three bits long (eight entries):
+
+000: A,1
+001: A,1
+010: A,1
+011: A,1
+100: B,2
+101: B,2
+110: -> table X (gobble 3 bits)
+111: -> table Y (gobble 3 bits)
+
+Each entry is what the bits decode as and how many bits that is, i.e. how
+many bits to gobble.  Or the entry points to another table, with the number of
+bits to gobble implicit in the size of the table.
+
+Table X is two bits long since the longest code starting with 110 is five bits
+long:
+
+00: C,1
+01: C,1
+10: D,2
+11: E,2
+
+Table Y is three bits long since the longest code starting with 111 is six
+bits long:
+
+000: F,2
+001: F,2
+010: G,2
+011: G,2
+100: H,2
+101: H,2
+110: I,3
+111: J,3
+
+So what we have here are three tables with a total of 20 entries that had to
+be constructed.  That's compared to 64 entries for a single table.  Or
+compared to 16 entries for a Huffman tree (six two entry tables and one four
+entry table).  Assuming that the code ideally represents the probability of
+the symbols, it takes on the average 1.25 lookups per symbol.  That's compared
+to one lookup for the single table, or 1.66 lookups per symbol for the
+Huffman tree.
+
+There, I think that gives you a picture of what's going on.  For inflate, the
+meaning of a particular symbol is often more than just a letter.  It can be a
+byte (a "literal"), or it can be either a length or a distance which
+indicates a base value and a number of bits to fetch after the code that is
+added to the base value.  Or it might be the special end-of-block code.  The
+data structures created in inftrees.c try to encode all that information
+compactly in the tables.
+
+
+Jean-loup Gailly        Mark Adler
+jloup@gzip.org          madler@alumni.caltech.edu
+
+
+References:
+
+[LZ77] Ziv J., Lempel A., ``A Universal Algorithm for Sequential Data
+Compression,'' IEEE Transactions on Information Theory, Vol. 23, No. 3,
+pp. 337-343.
+
+``DEFLATE Compressed Data Format Specification'' available in
+http://tools.ietf.org/html/rfc1951
diff --git a/libraries/zlib/compress.c b/libraries/zlib/compress.c
new file mode 100644
index 000000000..e2db404ab
--- /dev/null
+++ b/libraries/zlib/compress.c
@@ -0,0 +1,86 @@
+/* compress.c -- compress a memory buffer
+ * Copyright (C) 1995-2005, 2014, 2016 Jean-loup Gailly, Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* @(#) $Id$ */
+
+#define ZLIB_INTERNAL
+#include "zlib.h"
+
+/* ===========================================================================
+     Compresses the source buffer into the destination buffer. The level
+   parameter has the same meaning as in deflateInit.  sourceLen is the byte
+   length of the source buffer. Upon entry, destLen is the total size of the
+   destination buffer, which must be at least 0.1% larger than sourceLen plus
+   12 bytes. Upon exit, destLen is the actual size of the compressed buffer.
+
+     compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_BUF_ERROR if there was not enough room in the output buffer,
+   Z_STREAM_ERROR if the level parameter is invalid.
+*/
+int ZEXPORT compress2 (dest, destLen, source, sourceLen, level)
+    Bytef *dest;
+    uLongf *destLen;
+    const Bytef *source;
+    uLong sourceLen;
+    int level;
+{
+    z_stream stream;
+    int err;
+    const uInt max = (uInt)-1;
+    uLong left;
+
+    left = *destLen;
+    *destLen = 0;
+
+    stream.zalloc = (alloc_func)0;
+    stream.zfree = (free_func)0;
+    stream.opaque = (voidpf)0;
+
+    err = deflateInit(&stream, level);
+    if (err != Z_OK) return err;
+
+    stream.next_out = dest;
+    stream.avail_out = 0;
+    stream.next_in = (z_const Bytef *)source;
+    stream.avail_in = 0;
+
+    do {
+        if (stream.avail_out == 0) {
+            stream.avail_out = left > (uLong)max ? max : (uInt)left;
+            left -= stream.avail_out;
+        }
+        if (stream.avail_in == 0) {
+            stream.avail_in = sourceLen > (uLong)max ? max : (uInt)sourceLen;
+            sourceLen -= stream.avail_in;
+        }
+        err = deflate(&stream, sourceLen ? Z_NO_FLUSH : Z_FINISH);
+    } while (err == Z_OK);
+
+    *destLen = stream.total_out;
+    deflateEnd(&stream);
+    return err == Z_STREAM_END ? Z_OK : err;
+}
+
+/* ===========================================================================
+ */
+int ZEXPORT compress (dest, destLen, source, sourceLen)
+    Bytef *dest;
+    uLongf *destLen;
+    const Bytef *source;
+    uLong sourceLen;
+{
+    return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION);
+}
+
+/* ===========================================================================
+     If the default memLevel or windowBits for deflateInit() is changed, then
+   this function needs to be updated.
+ */
+uLong ZEXPORT compressBound (sourceLen)
+    uLong sourceLen;
+{
+    return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) +
+           (sourceLen >> 25) + 13;
+}
diff --git a/libraries/zlib/crc32.c b/libraries/zlib/crc32.c
new file mode 100644
index 000000000..9580440c0
--- /dev/null
+++ b/libraries/zlib/crc32.c
@@ -0,0 +1,442 @@
+/* crc32.c -- compute the CRC-32 of a data stream
+ * Copyright (C) 1995-2006, 2010, 2011, 2012, 2016 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ *
+ * Thanks to Rodney Brown <rbrown64@csc.com.au> for his contribution of faster
+ * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing
+ * tables for updating the shift register in one step with three exclusive-ors
+ * instead of four steps with four exclusive-ors.  This results in about a
+ * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3.
+ */
+
+/* @(#) $Id$ */
+
+/*
+  Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
+  protection on the static variables used to control the first-use generation
+  of the crc tables.  Therefore, if you #define DYNAMIC_CRC_TABLE, you should
+  first call get_crc_table() to initialize the tables before allowing more than
+  one thread to use crc32().
+
+  DYNAMIC_CRC_TABLE and MAKECRCH can be #defined to write out crc32.h.
+ */
+
+#ifdef MAKECRCH
+#  include <stdio.h>
+#  ifndef DYNAMIC_CRC_TABLE
+#    define DYNAMIC_CRC_TABLE
+#  endif /* !DYNAMIC_CRC_TABLE */
+#endif /* MAKECRCH */
+
+#include "zutil.h"      /* for STDC and FAR definitions */
+
+/* Definitions for doing the crc four data bytes at a time. */
+#if !defined(NOBYFOUR) && defined(Z_U4)
+#  define BYFOUR
+#endif
+#ifdef BYFOUR
+   local unsigned long crc32_little OF((unsigned long,
+                        const unsigned char FAR *, z_size_t));
+   local unsigned long crc32_big OF((unsigned long,
+                        const unsigned char FAR *, z_size_t));
+#  define TBLS 8
+#else
+#  define TBLS 1
+#endif /* BYFOUR */
+
+/* Local functions for crc concatenation */
+local unsigned long gf2_matrix_times OF((unsigned long *mat,
+                                         unsigned long vec));
+local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat));
+local uLong crc32_combine_ OF((uLong crc1, uLong crc2, z_off64_t len2));
+
+
+#ifdef DYNAMIC_CRC_TABLE
+
+local volatile int crc_table_empty = 1;
+local z_crc_t FAR crc_table[TBLS][256];
+local void make_crc_table OF((void));
+#ifdef MAKECRCH
+   local void write_table OF((FILE *, const z_crc_t FAR *));
+#endif /* MAKECRCH */
+/*
+  Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
+  x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
+
+  Polynomials over GF(2) are represented in binary, one bit per coefficient,
+  with the lowest powers in the most significant bit.  Then adding polynomials
+  is just exclusive-or, and multiplying a polynomial by x is a right shift by
+  one.  If we call the above polynomial p, and represent a byte as the
+  polynomial q, also with the lowest power in the most significant bit (so the
+  byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p,
+  where a mod b means the remainder after dividing a by b.
+
+  This calculation is done using the shift-register method of multiplying and
+  taking the remainder.  The register is initialized to zero, and for each
+  incoming bit, x^32 is added mod p to the register if the bit is a one (where
+  x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by
+  x (which is shifting right by one and adding x^32 mod p if the bit shifted
+  out is a one).  We start with the highest power (least significant bit) of
+  q and repeat for all eight bits of q.
+
+  The first table is simply the CRC of all possible eight bit values.  This is
+  all the information needed to generate CRCs on data a byte at a time for all
+  combinations of CRC register values and incoming bytes.  The remaining tables
+  allow for word-at-a-time CRC calculation for both big-endian and little-
+  endian machines, where a word is four bytes.
+*/
+local void make_crc_table()
+{
+    z_crc_t c;
+    int n, k;
+    z_crc_t poly;                       /* polynomial exclusive-or pattern */
+    /* terms of polynomial defining this crc (except x^32): */
+    static volatile int first = 1;      /* flag to limit concurrent making */
+    static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26};
+
+    /* See if another task is already doing this (not thread-safe, but better
+       than nothing -- significantly reduces duration of vulnerability in
+       case the advice about DYNAMIC_CRC_TABLE is ignored) */
+    if (first) {
+        first = 0;
+
+        /* make exclusive-or pattern from polynomial (0xedb88320UL) */
+        poly = 0;
+        for (n = 0; n < (int)(sizeof(p)/sizeof(unsigned char)); n++)
+            poly |= (z_crc_t)1 << (31 - p[n]);
+
+        /* generate a crc for every 8-bit value */
+        for (n = 0; n < 256; n++) {
+            c = (z_crc_t)n;
+            for (k = 0; k < 8; k++)
+                c = c & 1 ? poly ^ (c >> 1) : c >> 1;
+            crc_table[0][n] = c;
+        }
+
+#ifdef BYFOUR
+        /* generate crc for each value followed by one, two, and three zeros,
+           and then the byte reversal of those as well as the first table */
+        for (n = 0; n < 256; n++) {
+            c = crc_table[0][n];
+            crc_table[4][n] = ZSWAP32(c);
+            for (k = 1; k < 4; k++) {
+                c = crc_table[0][c & 0xff] ^ (c >> 8);
+                crc_table[k][n] = c;
+                crc_table[k + 4][n] = ZSWAP32(c);
+            }
+        }
+#endif /* BYFOUR */
+
+        crc_table_empty = 0;
+    }
+    else {      /* not first */
+        /* wait for the other guy to finish (not efficient, but rare) */
+        while (crc_table_empty)
+            ;
+    }
+
+#ifdef MAKECRCH
+    /* write out CRC tables to crc32.h */
+    {
+        FILE *out;
+
+        out = fopen("crc32.h", "w");
+        if (out == NULL) return;
+        fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n");
+        fprintf(out, " * Generated automatically by crc32.c\n */\n\n");
+        fprintf(out, "local const z_crc_t FAR ");
+        fprintf(out, "crc_table[TBLS][256] =\n{\n  {\n");
+        write_table(out, crc_table[0]);
+#  ifdef BYFOUR
+        fprintf(out, "#ifdef BYFOUR\n");
+        for (k = 1; k < 8; k++) {
+            fprintf(out, "  },\n  {\n");
+            write_table(out, crc_table[k]);
+        }
+        fprintf(out, "#endif\n");
+#  endif /* BYFOUR */
+        fprintf(out, "  }\n};\n");
+        fclose(out);
+    }
+#endif /* MAKECRCH */
+}
+
+#ifdef MAKECRCH
+local void write_table(out, table)
+    FILE *out;
+    const z_crc_t FAR *table;
+{
+    int n;
+
+    for (n = 0; n < 256; n++)
+        fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : "    ",
+                (unsigned long)(table[n]),
+                n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", "));
+}
+#endif /* MAKECRCH */
+
+#else /* !DYNAMIC_CRC_TABLE */
+/* ========================================================================
+ * Tables of CRC-32s of all single-byte values, made by make_crc_table().
+ */
+#include "crc32.h"
+#endif /* DYNAMIC_CRC_TABLE */
+
+/* =========================================================================
+ * This function can be used by asm versions of crc32()
+ */
+const z_crc_t FAR * ZEXPORT get_crc_table()
+{
+#ifdef DYNAMIC_CRC_TABLE
+    if (crc_table_empty)
+        make_crc_table();
+#endif /* DYNAMIC_CRC_TABLE */
+    return (const z_crc_t FAR *)crc_table;
+}
+
+/* ========================================================================= */
+#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8)
+#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
+
+/* ========================================================================= */
+unsigned long ZEXPORT crc32_z(crc, buf, len)
+    unsigned long crc;
+    const unsigned char FAR *buf;
+    z_size_t len;
+{
+    if (buf == Z_NULL) return 0UL;
+
+#ifdef DYNAMIC_CRC_TABLE
+    if (crc_table_empty)
+        make_crc_table();
+#endif /* DYNAMIC_CRC_TABLE */
+
+#ifdef BYFOUR
+    if (sizeof(void *) == sizeof(ptrdiff_t)) {
+        z_crc_t endian;
+
+        endian = 1;
+        if (*((unsigned char *)(&endian)))
+            return crc32_little(crc, buf, len);
+        else
+            return crc32_big(crc, buf, len);
+    }
+#endif /* BYFOUR */
+    crc = crc ^ 0xffffffffUL;
+    while (len >= 8) {
+        DO8;
+        len -= 8;
+    }
+    if (len) do {
+        DO1;
+    } while (--len);
+    return crc ^ 0xffffffffUL;
+}
+
+/* ========================================================================= */
+unsigned long ZEXPORT crc32(crc, buf, len)
+    unsigned long crc;
+    const unsigned char FAR *buf;
+    uInt len;
+{
+    return crc32_z(crc, buf, len);
+}
+
+#ifdef BYFOUR
+
+/*
+   This BYFOUR code accesses the passed unsigned char * buffer with a 32-bit
+   integer pointer type. This violates the strict aliasing rule, where a
+   compiler can assume, for optimization purposes, that two pointers to
+   fundamentally different types won't ever point to the same memory. This can
+   manifest as a problem only if one of the pointers is written to. This code
+   only reads from those pointers. So long as this code remains isolated in
+   this compilation unit, there won't be a problem. For this reason, this code
+   should not be copied and pasted into a compilation unit in which other code
+   writes to the buffer that is passed to these routines.
+ */
+
+/* ========================================================================= */
+#define DOLIT4 c ^= *buf4++; \
+        c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \
+            crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24]
+#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4
+
+/* ========================================================================= */
+local unsigned long crc32_little(crc, buf, len)
+    unsigned long crc;
+    const unsigned char FAR *buf;
+    z_size_t len;
+{
+    register z_crc_t c;
+    register const z_crc_t FAR *buf4;
+
+    c = (z_crc_t)crc;
+    c = ~c;
+    while (len && ((ptrdiff_t)buf & 3)) {
+        c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
+        len--;
+    }
+
+    buf4 = (const z_crc_t FAR *)(const void FAR *)buf;
+    while (len >= 32) {
+        DOLIT32;
+        len -= 32;
+    }
+    while (len >= 4) {
+        DOLIT4;
+        len -= 4;
+    }
+    buf = (const unsigned char FAR *)buf4;
+
+    if (len) do {
+        c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
+    } while (--len);
+    c = ~c;
+    return (unsigned long)c;
+}
+
+/* ========================================================================= */
+#define DOBIG4 c ^= *buf4++; \
+        c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \
+            crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24]
+#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
+
+/* ========================================================================= */
+local unsigned long crc32_big(crc, buf, len)
+    unsigned long crc;
+    const unsigned char FAR *buf;
+    z_size_t len;
+{
+    register z_crc_t c;
+    register const z_crc_t FAR *buf4;
+
+    c = ZSWAP32((z_crc_t)crc);
+    c = ~c;
+    while (len && ((ptrdiff_t)buf & 3)) {
+        c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
+        len--;
+    }
+
+    buf4 = (const z_crc_t FAR *)(const void FAR *)buf;
+    while (len >= 32) {
+        DOBIG32;
+        len -= 32;
+    }
+    while (len >= 4) {
+        DOBIG4;
+        len -= 4;
+    }
+    buf = (const unsigned char FAR *)buf4;
+
+    if (len) do {
+        c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
+    } while (--len);
+    c = ~c;
+    return (unsigned long)(ZSWAP32(c));
+}
+
+#endif /* BYFOUR */
+
+#define GF2_DIM 32      /* dimension of GF(2) vectors (length of CRC) */
+
+/* ========================================================================= */
+local unsigned long gf2_matrix_times(mat, vec)
+    unsigned long *mat;
+    unsigned long vec;
+{
+    unsigned long sum;
+
+    sum = 0;
+    while (vec) {
+        if (vec & 1)
+            sum ^= *mat;
+        vec >>= 1;
+        mat++;
+    }
+    return sum;
+}
+
+/* ========================================================================= */
+local void gf2_matrix_square(square, mat)
+    unsigned long *square;
+    unsigned long *mat;
+{
+    int n;
+
+    for (n = 0; n < GF2_DIM; n++)
+        square[n] = gf2_matrix_times(mat, mat[n]);
+}
+
+/* ========================================================================= */
+local uLong crc32_combine_(crc1, crc2, len2)
+    uLong crc1;
+    uLong crc2;
+    z_off64_t len2;
+{
+    int n;
+    unsigned long row;
+    unsigned long even[GF2_DIM];    /* even-power-of-two zeros operator */
+    unsigned long odd[GF2_DIM];     /* odd-power-of-two zeros operator */
+
+    /* degenerate case (also disallow negative lengths) */
+    if (len2 <= 0)
+        return crc1;
+
+    /* put operator for one zero bit in odd */
+    odd[0] = 0xedb88320UL;          /* CRC-32 polynomial */
+    row = 1;
+    for (n = 1; n < GF2_DIM; n++) {
+        odd[n] = row;
+        row <<= 1;
+    }
+
+    /* put operator for two zero bits in even */
+    gf2_matrix_square(even, odd);
+
+    /* put operator for four zero bits in odd */
+    gf2_matrix_square(odd, even);
+
+    /* apply len2 zeros to crc1 (first square will put the operator for one
+       zero byte, eight zero bits, in even) */
+    do {
+        /* apply zeros operator for this bit of len2 */
+        gf2_matrix_square(even, odd);
+        if (len2 & 1)
+            crc1 = gf2_matrix_times(even, crc1);
+        len2 >>= 1;
+
+        /* if no more bits set, then done */
+        if (len2 == 0)
+            break;
+
+        /* another iteration of the loop with odd and even swapped */
+        gf2_matrix_square(odd, even);
+        if (len2 & 1)
+            crc1 = gf2_matrix_times(odd, crc1);
+        len2 >>= 1;
+
+        /* if no more bits set, then done */
+    } while (len2 != 0);
+
+    /* return combined crc */
+    crc1 ^= crc2;
+    return crc1;
+}
+
+/* ========================================================================= */
+uLong ZEXPORT crc32_combine(crc1, crc2, len2)
+    uLong crc1;
+    uLong crc2;
+    z_off_t len2;
+{
+    return crc32_combine_(crc1, crc2, len2);
+}
+
+uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
+    uLong crc1;
+    uLong crc2;
+    z_off64_t len2;
+{
+    return crc32_combine_(crc1, crc2, len2);
+}
diff --git a/libraries/zlib/crc32.h b/libraries/zlib/crc32.h
new file mode 100644
index 000000000..9e0c77810
--- /dev/null
+++ b/libraries/zlib/crc32.h
@@ -0,0 +1,441 @@
+/* crc32.h -- tables for rapid CRC calculation
+ * Generated automatically by crc32.c
+ */
+
+local const z_crc_t FAR crc_table[TBLS][256] =
+{
+  {
+    0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
+    0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
+    0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
+    0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
+    0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
+    0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
+    0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
+    0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
+    0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
+    0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
+    0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
+    0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
+    0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
+    0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
+    0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
+    0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
+    0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
+    0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
+    0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
+    0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
+    0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
+    0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
+    0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
+    0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
+    0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
+    0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
+    0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
+    0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
+    0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
+    0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
+    0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
+    0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
+    0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
+    0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
+    0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
+    0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
+    0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
+    0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
+    0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
+    0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
+    0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
+    0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
+    0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
+    0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
+    0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
+    0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
+    0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
+    0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
+    0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
+    0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
+    0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
+    0x2d02ef8dUL
+#ifdef BYFOUR
+  },
+  {
+    0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL,
+    0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL,
+    0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL,
+    0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL,
+    0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL,
+    0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL,
+    0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL,
+    0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL,
+    0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL,
+    0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL,
+    0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL,
+    0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL,
+    0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL,
+    0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL,
+    0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL,
+    0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL,
+    0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL,
+    0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL,
+    0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL,
+    0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL,
+    0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL,
+    0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL,
+    0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL,
+    0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL,
+    0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL,
+    0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL,
+    0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL,
+    0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL,
+    0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL,
+    0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL,
+    0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL,
+    0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL,
+    0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL,
+    0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL,
+    0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL,
+    0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL,
+    0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL,
+    0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL,
+    0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL,
+    0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL,
+    0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL,
+    0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL,
+    0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL,
+    0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL,
+    0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL,
+    0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL,
+    0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL,
+    0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL,
+    0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL,
+    0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL,
+    0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL,
+    0x9324fd72UL
+  },
+  {
+    0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL,
+    0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL,
+    0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL,
+    0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL,
+    0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL,
+    0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL,
+    0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL,
+    0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL,
+    0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL,
+    0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL,
+    0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL,
+    0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL,
+    0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL,
+    0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL,
+    0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL,
+    0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL,
+    0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL,
+    0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL,
+    0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL,
+    0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL,
+    0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL,
+    0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL,
+    0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL,
+    0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL,
+    0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL,
+    0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL,
+    0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL,
+    0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL,
+    0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL,
+    0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL,
+    0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL,
+    0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL,
+    0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL,
+    0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL,
+    0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL,
+    0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL,
+    0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL,
+    0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL,
+    0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL,
+    0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL,
+    0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL,
+    0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL,
+    0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL,
+    0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL,
+    0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL,
+    0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL,
+    0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL,
+    0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL,
+    0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL,
+    0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL,
+    0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL,
+    0xbe9834edUL
+  },
+  {
+    0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL,
+    0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL,
+    0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL,
+    0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL,
+    0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL,
+    0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL,
+    0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL,
+    0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL,
+    0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL,
+    0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL,
+    0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL,
+    0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL,
+    0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL,
+    0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL,
+    0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL,
+    0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL,
+    0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL,
+    0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL,
+    0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL,
+    0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL,
+    0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL,
+    0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL,
+    0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL,
+    0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL,
+    0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL,
+    0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL,
+    0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL,
+    0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL,
+    0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL,
+    0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL,
+    0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL,
+    0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL,
+    0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL,
+    0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL,
+    0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL,
+    0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL,
+    0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL,
+    0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL,
+    0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL,
+    0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL,
+    0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL,
+    0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL,
+    0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL,
+    0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL,
+    0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL,
+    0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL,
+    0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL,
+    0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL,
+    0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL,
+    0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL,
+    0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL,
+    0xde0506f1UL
+  },
+  {
+    0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL,
+    0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, 0xa4b8dc79UL,
+    0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL,
+    0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, 0x4871b9f3UL, 0xde41be84UL,
+    0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL,
+    0xc0a86b64UL, 0x7af962fdUL, 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL,
+    0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL,
+    0x727167a2UL, 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL,
+    0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL,
+    0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL,
+    0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, 0x9995bacfUL,
+    0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL,
+    0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, 0x3d2d66b6UL, 0x9041dc76UL,
+    0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL,
+    0xa5e4bf9fUL, 0x33d4b8e8UL, 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL,
+    0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL,
+    0xf4516b6bUL, 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL,
+    0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL,
+    0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL,
+    0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, 0xe230bbd4UL,
+    0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL,
+    0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, 0x732d0444UL, 0xe51d0333UL,
+    0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL,
+    0x86200cc9UL, 0x25b56857UL, 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL,
+    0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL,
+    0x810db42eUL, 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL,
+    0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL,
+    0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL,
+    0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, 0x44930ff0UL,
+    0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL,
+    0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, 0xe02bd389UL, 0x5a7ada10UL,
+    0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL,
+    0xe8a3d6d6UL, 0x7e93d1a1UL, 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL,
+    0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL,
+    0xf64a0336UL, 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL,
+    0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL,
+    0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL,
+    0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, 0x31cfd0b5UL,
+    0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL,
+    0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, 0x85670772UL, 0x13570005UL,
+    0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL,
+    0x0dbed5e5UL, 0xb7efdc7cUL, 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL,
+    0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL,
+    0x7747b718UL, 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL,
+    0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL,
+    0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL,
+    0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, 0x660bdf40UL,
+    0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL,
+    0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, 0xa6a3b424UL, 0x0536d0baUL,
+    0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL,
+    0x021b685dUL, 0x942b6f2aUL, 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL,
+    0x8def022dUL
+  },
+  {
+    0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL,
+    0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, 0x49bbc2d1UL,
+    0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL,
+    0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, 0xd370f478UL, 0x9241ef61UL,
+    0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL,
+    0x18a9009bUL, 0xdbfa2db0UL, 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL,
+    0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL,
+    0x6177a9beUL, 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL,
+    0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL,
+    0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL,
+    0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, 0x75911c89UL,
+    0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL,
+    0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, 0x3c2ade58UL, 0x054f79f0UL,
+    0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL,
+    0x83e823a6UL, 0xc2d938bfUL, 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL,
+    0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL,
+    0x545dbbbaUL, 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL,
+    0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL,
+    0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL,
+    0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, 0x6438d04eUL,
+    0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL,
+    0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, 0xab2448c9UL, 0xea1553d0UL,
+    0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL,
+    0x352a1204UL, 0xf2bc534bUL, 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL,
+    0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL,
+    0xbb07919aUL, 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL,
+    0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL,
+    0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL,
+    0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, 0x1a8a4171UL,
+    0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL,
+    0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, 0x533183a0UL, 0x9062ae8bUL,
+    0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL,
+    0xe9bc07aeUL, 0xa88d1cb7UL, 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL,
+    0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL,
+    0x6354e854UL, 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL,
+    0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL,
+    0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL,
+    0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, 0xf5d06b51UL,
+    0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL,
+    0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, 0xc870a09dUL, 0x8941bb84UL,
+    0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL,
+    0x03a9547eUL, 0xc0fa7955UL, 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL,
+    0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL,
+    0xd85379ceUL, 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL,
+    0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL,
+    0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL,
+    0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, 0x661b91a4UL,
+    0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL,
+    0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, 0x7eb2913fUL, 0xb924d070UL,
+    0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL,
+    0x370953eeUL, 0x763848f7UL, 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL,
+    0x72fd2493UL
+  },
+  {
+    0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL,
+    0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, 0x8f3bd10fUL,
+    0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL,
+    0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, 0x1e77a21fUL, 0x291d601eUL,
+    0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL,
+    0xff98f713UL, 0xa626b111UL, 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL,
+    0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL,
+    0xb9f80b3aUL, 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL,
+    0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL,
+    0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL,
+    0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, 0x2299e620UL,
+    0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL,
+    0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, 0xada2372fUL, 0xc08d9a70UL,
+    0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL,
+    0x72f11774UL, 0x459bd575UL, 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL,
+    0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL,
+    0xb02ebc6cUL, 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL,
+    0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL,
+    0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL,
+    0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, 0x7975914aUL,
+    0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL,
+    0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, 0x4432cd41UL, 0x73580f40UL,
+    0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL,
+    0x09d6b756UL, 0x8cc0f853UL, 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL,
+    0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL,
+    0x03fb295cUL, 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL,
+    0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL,
+    0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL,
+    0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, 0xf0b813fdUL,
+    0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL,
+    0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, 0x7f83c2f2UL, 0x263d84f0UL,
+    0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL,
+    0x605d78d9UL, 0x5737bad8UL, 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL,
+    0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL,
+    0xb6d8efd4UL, 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL,
+    0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL,
+    0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL,
+    0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, 0x436d86cdUL,
+    0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL,
+    0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, 0xf2ea2295UL, 0xc580e094UL,
+    0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL,
+    0x13057799UL, 0x4abb319bUL, 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL,
+    0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL,
+    0xb523c688UL, 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL,
+    0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL,
+    0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL,
+    0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, 0x765575a4UL,
+    0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL,
+    0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, 0x89cd82b7UL, 0x0cdbcdb2UL,
+    0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL,
+    0x06f653b8UL, 0x319c91b9UL, 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL,
+    0xed3498beUL
+  },
+  {
+    0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL,
+    0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, 0x8a4f087dUL,
+    0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL,
+    0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, 0x149f10faUL, 0x71f8ac42UL,
+    0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL,
+    0x1518112dUL, 0xfbb7a43fUL, 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL,
+    0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL,
+    0xd00087b2UL, 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL,
+    0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL,
+    0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL,
+    0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, 0x7da740d5UL,
+    0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL,
+    0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, 0xf7e848a8UL, 0x3d58149bUL,
+    0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL,
+    0xe1077fbeUL, 0x8460c306UL, 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL,
+    0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL,
+    0xa20f0dcbUL, 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL,
+    0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL,
+    0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL,
+    0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, 0xed589329UL,
+    0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL,
+    0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, 0xbb48f071UL, 0xde2f4cc9UL,
+    0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL,
+    0x720f8a79UL, 0xcb375de4UL, 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL,
+    0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL,
+    0x41785599UL, 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL,
+    0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL,
+    0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL,
+    0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, 0xa4e140bdUL,
+    0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL,
+    0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, 0x2eae48c0UL, 0xc001fdd2UL,
+    0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL,
+    0x05196b4dUL, 0x607ed7f5UL, 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL,
+    0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL,
+    0x61f9d622UL, 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL,
+    0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL,
+    0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL,
+    0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, 0x479618efUL,
+    0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL,
+    0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, 0xdab12653UL, 0xbfd69aebUL,
+    0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL,
+    0xdb362784UL, 0x35999296UL, 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL,
+    0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL,
+    0x208183bbUL, 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL,
+    0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL,
+    0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL,
+    0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, 0x5ca1c2b9UL,
+    0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL,
+    0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, 0x49b9d394UL, 0xf0810409UL,
+    0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL,
+    0xc3f6dbe9UL, 0xa6916751UL, 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL,
+    0xf10605deUL
+#endif
+  }
+};
diff --git a/libraries/zlib/deflate.c b/libraries/zlib/deflate.c
new file mode 100644
index 000000000..1ec761448
--- /dev/null
+++ b/libraries/zlib/deflate.c
@@ -0,0 +1,2163 @@
+/* deflate.c -- compress data using the deflation algorithm
+ * Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/*
+ *  ALGORITHM
+ *
+ *      The "deflation" process depends on being able to identify portions
+ *      of the input text which are identical to earlier input (within a
+ *      sliding window trailing behind the input currently being processed).
+ *
+ *      The most straightforward technique turns out to be the fastest for
+ *      most input files: try all possible matches and select the longest.
+ *      The key feature of this algorithm is that insertions into the string
+ *      dictionary are very simple and thus fast, and deletions are avoided
+ *      completely. Insertions are performed at each input character, whereas
+ *      string matches are performed only when the previous match ends. So it
+ *      is preferable to spend more time in matches to allow very fast string
+ *      insertions and avoid deletions. The matching algorithm for small
+ *      strings is inspired from that of Rabin & Karp. A brute force approach
+ *      is used to find longer strings when a small match has been found.
+ *      A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
+ *      (by Leonid Broukhis).
+ *         A previous version of this file used a more sophisticated algorithm
+ *      (by Fiala and Greene) which is guaranteed to run in linear amortized
+ *      time, but has a larger average cost, uses more memory and is patented.
+ *      However the F&G algorithm may be faster for some highly redundant
+ *      files if the parameter max_chain_length (described below) is too large.
+ *
+ *  ACKNOWLEDGEMENTS
+ *
+ *      The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
+ *      I found it in 'freeze' written by Leonid Broukhis.
+ *      Thanks to many people for bug reports and testing.
+ *
+ *  REFERENCES
+ *
+ *      Deutsch, L.P.,"DEFLATE Compressed Data Format Specification".
+ *      Available in http://tools.ietf.org/html/rfc1951
+ *
+ *      A description of the Rabin and Karp algorithm is given in the book
+ *         "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
+ *
+ *      Fiala,E.R., and Greene,D.H.
+ *         Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
+ *
+ */
+
+/* @(#) $Id$ */
+
+#include "deflate.h"
+
+const char deflate_copyright[] =
+   " deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler ";
+/*
+  If you use the zlib library in a product, an acknowledgment is welcome
+  in the documentation of your product. If for some reason you cannot
+  include such an acknowledgment, I would appreciate that you keep this
+  copyright string in the executable of your product.
+ */
+
+/* ===========================================================================
+ *  Function prototypes.
+ */
+typedef enum {
+    need_more,      /* block not completed, need more input or more output */
+    block_done,     /* block flush performed */
+    finish_started, /* finish started, need only more output at next deflate */
+    finish_done     /* finish done, accept no more input or output */
+} block_state;
+
+typedef block_state (*compress_func) OF((deflate_state *s, int flush));
+/* Compression function. Returns the block state after the call. */
+
+local int deflateStateCheck      OF((z_streamp strm));
+local void slide_hash     OF((deflate_state *s));
+local void fill_window    OF((deflate_state *s));
+local block_state deflate_stored OF((deflate_state *s, int flush));
+local block_state deflate_fast   OF((deflate_state *s, int flush));
+#ifndef FASTEST
+local block_state deflate_slow   OF((deflate_state *s, int flush));
+#endif
+local block_state deflate_rle    OF((deflate_state *s, int flush));
+local block_state deflate_huff   OF((deflate_state *s, int flush));
+local void lm_init        OF((deflate_state *s));
+local void putShortMSB    OF((deflate_state *s, uInt b));
+local void flush_pending  OF((z_streamp strm));
+local unsigned read_buf   OF((z_streamp strm, Bytef *buf, unsigned size));
+#ifdef ASMV
+#  pragma message("Assembler code may have bugs -- use at your own risk")
+      void match_init OF((void)); /* asm code initialization */
+      uInt longest_match  OF((deflate_state *s, IPos cur_match));
+#else
+local uInt longest_match  OF((deflate_state *s, IPos cur_match));
+#endif
+
+#ifdef ZLIB_DEBUG
+local  void check_match OF((deflate_state *s, IPos start, IPos match,
+                            int length));
+#endif
+
+/* ===========================================================================
+ * Local data
+ */
+
+#define NIL 0
+/* Tail of hash chains */
+
+#ifndef TOO_FAR
+#  define TOO_FAR 4096
+#endif
+/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
+
+/* Values for max_lazy_match, good_match and max_chain_length, depending on
+ * the desired pack level (0..9). The values given below have been tuned to
+ * exclude worst case performance for pathological files. Better values may be
+ * found for specific files.
+ */
+typedef struct config_s {
+   ush good_length; /* reduce lazy search above this match length */
+   ush max_lazy;    /* do not perform lazy search above this match length */
+   ush nice_length; /* quit search above this match length */
+   ush max_chain;
+   compress_func func;
+} config;
+
+#ifdef FASTEST
+local const config configuration_table[2] = {
+/*      good lazy nice chain */
+/* 0 */ {0,    0,  0,    0, deflate_stored},  /* store only */
+/* 1 */ {4,    4,  8,    4, deflate_fast}}; /* max speed, no lazy matches */
+#else
+local const config configuration_table[10] = {
+/*      good lazy nice chain */
+/* 0 */ {0,    0,  0,    0, deflate_stored},  /* store only */
+/* 1 */ {4,    4,  8,    4, deflate_fast}, /* max speed, no lazy matches */
+/* 2 */ {4,    5, 16,    8, deflate_fast},
+/* 3 */ {4,    6, 32,   32, deflate_fast},
+
+/* 4 */ {4,    4, 16,   16, deflate_slow},  /* lazy matches */
+/* 5 */ {8,   16, 32,   32, deflate_slow},
+/* 6 */ {8,   16, 128, 128, deflate_slow},
+/* 7 */ {8,   32, 128, 256, deflate_slow},
+/* 8 */ {32, 128, 258, 1024, deflate_slow},
+/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* max compression */
+#endif
+
+/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4
+ * For deflate_fast() (levels <= 3) good is ignored and lazy has a different
+ * meaning.
+ */
+
+/* rank Z_BLOCK between Z_NO_FLUSH and Z_PARTIAL_FLUSH */
+#define RANK(f) (((f) * 2) - ((f) > 4 ? 9 : 0))
+
+/* ===========================================================================
+ * Update a hash value with the given input byte
+ * IN  assertion: all calls to UPDATE_HASH are made with consecutive input
+ *    characters, so that a running hash key can be computed from the previous
+ *    key instead of complete recalculation each time.
+ */
+#define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask)
+
+
+/* ===========================================================================
+ * Insert string str in the dictionary and set match_head to the previous head
+ * of the hash chain (the most recent string with same hash key). Return
+ * the previous length of the hash chain.
+ * If this file is compiled with -DFASTEST, the compression level is forced
+ * to 1, and no hash chains are maintained.
+ * IN  assertion: all calls to INSERT_STRING are made with consecutive input
+ *    characters and the first MIN_MATCH bytes of str are valid (except for
+ *    the last MIN_MATCH-1 bytes of the input file).
+ */
+#ifdef FASTEST
+#define INSERT_STRING(s, str, match_head) \
+   (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \
+    match_head = s->head[s->ins_h], \
+    s->head[s->ins_h] = (Pos)(str))
+#else
+#define INSERT_STRING(s, str, match_head) \
+   (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \
+    match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \
+    s->head[s->ins_h] = (Pos)(str))
+#endif
+
+/* ===========================================================================
+ * Initialize the hash table (avoiding 64K overflow for 16 bit systems).
+ * prev[] will be initialized on the fly.
+ */
+#define CLEAR_HASH(s) \
+    s->head[s->hash_size-1] = NIL; \
+    zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head));
+
+/* ===========================================================================
+ * Slide the hash table when sliding the window down (could be avoided with 32
+ * bit values at the expense of memory usage). We slide even when level == 0 to
+ * keep the hash table consistent if we switch back to level > 0 later.
+ */
+local void slide_hash(s)
+    deflate_state *s;
+{
+    unsigned n, m;
+    Posf *p;
+    uInt wsize = s->w_size;
+
+    n = s->hash_size;
+    p = &s->head[n];
+    do {
+        m = *--p;
+        *p = (Pos)(m >= wsize ? m - wsize : NIL);
+    } while (--n);
+    n = wsize;
+#ifndef FASTEST
+    p = &s->prev[n];
+    do {
+        m = *--p;
+        *p = (Pos)(m >= wsize ? m - wsize : NIL);
+        /* If n is not on any hash chain, prev[n] is garbage but
+         * its value will never be used.
+         */
+    } while (--n);
+#endif
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateInit_(strm, level, version, stream_size)
+    z_streamp strm;
+    int level;
+    const char *version;
+    int stream_size;
+{
+    return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL,
+                         Z_DEFAULT_STRATEGY, version, stream_size);
+    /* To do: ignore strm->next_in if we use it as window */
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
+                  version, stream_size)
+    z_streamp strm;
+    int  level;
+    int  method;
+    int  windowBits;
+    int  memLevel;
+    int  strategy;
+    const char *version;
+    int stream_size;
+{
+    deflate_state *s;
+    int wrap = 1;
+    static const char my_version[] = ZLIB_VERSION;
+
+    ushf *overlay;
+    /* We overlay pending_buf and d_buf+l_buf. This works since the average
+     * output size for (length,distance) codes is <= 24 bits.
+     */
+
+    if (version == Z_NULL || version[0] != my_version[0] ||
+        stream_size != sizeof(z_stream)) {
+        return Z_VERSION_ERROR;
+    }
+    if (strm == Z_NULL) return Z_STREAM_ERROR;
+
+    strm->msg = Z_NULL;
+    if (strm->zalloc == (alloc_func)0) {
+#ifdef Z_SOLO
+        return Z_STREAM_ERROR;
+#else
+        strm->zalloc = zcalloc;
+        strm->opaque = (voidpf)0;
+#endif
+    }
+    if (strm->zfree == (free_func)0)
+#ifdef Z_SOLO
+        return Z_STREAM_ERROR;
+#else
+        strm->zfree = zcfree;
+#endif
+
+#ifdef FASTEST
+    if (level != 0) level = 1;
+#else
+    if (level == Z_DEFAULT_COMPRESSION) level = 6;
+#endif
+
+    if (windowBits < 0) { /* suppress zlib wrapper */
+        wrap = 0;
+        windowBits = -windowBits;
+    }
+#ifdef GZIP
+    else if (windowBits > 15) {
+        wrap = 2;       /* write gzip wrapper instead */
+        windowBits -= 16;
+    }
+#endif
+    if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED ||
+        windowBits < 8 || windowBits > 15 || level < 0 || level > 9 ||
+        strategy < 0 || strategy > Z_FIXED || (windowBits == 8 && wrap != 1)) {
+        return Z_STREAM_ERROR;
+    }
+    if (windowBits == 8) windowBits = 9;  /* until 256-byte window bug fixed */
+    s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state));
+    if (s == Z_NULL) return Z_MEM_ERROR;
+    strm->state = (struct internal_state FAR *)s;
+    s->strm = strm;
+    s->status = INIT_STATE;     /* to pass state test in deflateReset() */
+
+    s->wrap = wrap;
+    s->gzhead = Z_NULL;
+    s->w_bits = (uInt)windowBits;
+    s->w_size = 1 << s->w_bits;
+    s->w_mask = s->w_size - 1;
+
+    s->hash_bits = (uInt)memLevel + 7;
+    s->hash_size = 1 << s->hash_bits;
+    s->hash_mask = s->hash_size - 1;
+    s->hash_shift =  ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH);
+
+    s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte));
+    s->prev   = (Posf *)  ZALLOC(strm, s->w_size, sizeof(Pos));
+    s->head   = (Posf *)  ZALLOC(strm, s->hash_size, sizeof(Pos));
+
+    s->high_water = 0;      /* nothing written to s->window yet */
+
+    s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */
+
+    overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2);
+    s->pending_buf = (uchf *) overlay;
+    s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L);
+
+    if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL ||
+        s->pending_buf == Z_NULL) {
+        s->status = FINISH_STATE;
+        strm->msg = ERR_MSG(Z_MEM_ERROR);
+        deflateEnd (strm);
+        return Z_MEM_ERROR;
+    }
+    s->d_buf = overlay + s->lit_bufsize/sizeof(ush);
+    s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize;
+
+    s->level = level;
+    s->strategy = strategy;
+    s->method = (Byte)method;
+
+    return deflateReset(strm);
+}
+
+/* =========================================================================
+ * Check for a valid deflate stream state. Return 0 if ok, 1 if not.
+ */
+local int deflateStateCheck (strm)
+    z_streamp strm;
+{
+    deflate_state *s;
+    if (strm == Z_NULL ||
+        strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0)
+        return 1;
+    s = strm->state;
+    if (s == Z_NULL || s->strm != strm || (s->status != INIT_STATE &&
+#ifdef GZIP
+                                           s->status != GZIP_STATE &&
+#endif
+                                           s->status != EXTRA_STATE &&
+                                           s->status != NAME_STATE &&
+                                           s->status != COMMENT_STATE &&
+                                           s->status != HCRC_STATE &&
+                                           s->status != BUSY_STATE &&
+                                           s->status != FINISH_STATE))
+        return 1;
+    return 0;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength)
+    z_streamp strm;
+    const Bytef *dictionary;
+    uInt  dictLength;
+{
+    deflate_state *s;
+    uInt str, n;
+    int wrap;
+    unsigned avail;
+    z_const unsigned char *next;
+
+    if (deflateStateCheck(strm) || dictionary == Z_NULL)
+        return Z_STREAM_ERROR;
+    s = strm->state;
+    wrap = s->wrap;
+    if (wrap == 2 || (wrap == 1 && s->status != INIT_STATE) || s->lookahead)
+        return Z_STREAM_ERROR;
+
+    /* when using zlib wrappers, compute Adler-32 for provided dictionary */
+    if (wrap == 1)
+        strm->adler = adler32(strm->adler, dictionary, dictLength);
+    s->wrap = 0;                    /* avoid computing Adler-32 in read_buf */
+
+    /* if dictionary would fill window, just replace the history */
+    if (dictLength >= s->w_size) {
+        if (wrap == 0) {            /* already empty otherwise */
+            CLEAR_HASH(s);
+            s->strstart = 0;
+            s->block_start = 0L;
+            s->insert = 0;
+        }
+        dictionary += dictLength - s->w_size;  /* use the tail */
+        dictLength = s->w_size;
+    }
+
+    /* insert dictionary into window and hash */
+    avail = strm->avail_in;
+    next = strm->next_in;
+    strm->avail_in = dictLength;
+    strm->next_in = (z_const Bytef *)dictionary;
+    fill_window(s);
+    while (s->lookahead >= MIN_MATCH) {
+        str = s->strstart;
+        n = s->lookahead - (MIN_MATCH-1);
+        do {
+            UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]);
+#ifndef FASTEST
+            s->prev[str & s->w_mask] = s->head[s->ins_h];
+#endif
+            s->head[s->ins_h] = (Pos)str;
+            str++;
+        } while (--n);
+        s->strstart = str;
+        s->lookahead = MIN_MATCH-1;
+        fill_window(s);
+    }
+    s->strstart += s->lookahead;
+    s->block_start = (long)s->strstart;
+    s->insert = s->lookahead;
+    s->lookahead = 0;
+    s->match_length = s->prev_length = MIN_MATCH-1;
+    s->match_available = 0;
+    strm->next_in = next;
+    strm->avail_in = avail;
+    s->wrap = wrap;
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateGetDictionary (strm, dictionary, dictLength)
+    z_streamp strm;
+    Bytef *dictionary;
+    uInt  *dictLength;
+{
+    deflate_state *s;
+    uInt len;
+
+    if (deflateStateCheck(strm))
+        return Z_STREAM_ERROR;
+    s = strm->state;
+    len = s->strstart + s->lookahead;
+    if (len > s->w_size)
+        len = s->w_size;
+    if (dictionary != Z_NULL && len)
+        zmemcpy(dictionary, s->window + s->strstart + s->lookahead - len, len);
+    if (dictLength != Z_NULL)
+        *dictLength = len;
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateResetKeep (strm)
+    z_streamp strm;
+{
+    deflate_state *s;
+
+    if (deflateStateCheck(strm)) {
+        return Z_STREAM_ERROR;
+    }
+
+    strm->total_in = strm->total_out = 0;
+    strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */
+    strm->data_type = Z_UNKNOWN;
+
+    s = (deflate_state *)strm->state;
+    s->pending = 0;
+    s->pending_out = s->pending_buf;
+
+    if (s->wrap < 0) {
+        s->wrap = -s->wrap; /* was made negative by deflate(..., Z_FINISH); */
+    }
+    s->status =
+#ifdef GZIP
+        s->wrap == 2 ? GZIP_STATE :
+#endif
+        s->wrap ? INIT_STATE : BUSY_STATE;
+    strm->adler =
+#ifdef GZIP
+        s->wrap == 2 ? crc32(0L, Z_NULL, 0) :
+#endif
+        adler32(0L, Z_NULL, 0);
+    s->last_flush = Z_NO_FLUSH;
+
+    _tr_init(s);
+
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateReset (strm)
+    z_streamp strm;
+{
+    int ret;
+
+    ret = deflateResetKeep(strm);
+    if (ret == Z_OK)
+        lm_init(strm->state);
+    return ret;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateSetHeader (strm, head)
+    z_streamp strm;
+    gz_headerp head;
+{
+    if (deflateStateCheck(strm) || strm->state->wrap != 2)
+        return Z_STREAM_ERROR;
+    strm->state->gzhead = head;
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflatePending (strm, pending, bits)
+    unsigned *pending;
+    int *bits;
+    z_streamp strm;
+{
+    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
+    if (pending != Z_NULL)
+        *pending = strm->state->pending;
+    if (bits != Z_NULL)
+        *bits = strm->state->bi_valid;
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflatePrime (strm, bits, value)
+    z_streamp strm;
+    int bits;
+    int value;
+{
+    deflate_state *s;
+    int put;
+
+    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
+    s = strm->state;
+    if ((Bytef *)(s->d_buf) < s->pending_out + ((Buf_size + 7) >> 3))
+        return Z_BUF_ERROR;
+    do {
+        put = Buf_size - s->bi_valid;
+        if (put > bits)
+            put = bits;
+        s->bi_buf |= (ush)((value & ((1 << put) - 1)) << s->bi_valid);
+        s->bi_valid += put;
+        _tr_flush_bits(s);
+        value >>= put;
+        bits -= put;
+    } while (bits);
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateParams(strm, level, strategy)
+    z_streamp strm;
+    int level;
+    int strategy;
+{
+    deflate_state *s;
+    compress_func func;
+
+    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
+    s = strm->state;
+
+#ifdef FASTEST
+    if (level != 0) level = 1;
+#else
+    if (level == Z_DEFAULT_COMPRESSION) level = 6;
+#endif
+    if (level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED) {
+        return Z_STREAM_ERROR;
+    }
+    func = configuration_table[s->level].func;
+
+    if ((strategy != s->strategy || func != configuration_table[level].func) &&
+        s->high_water) {
+        /* Flush the last buffer: */
+        int err = deflate(strm, Z_BLOCK);
+        if (err == Z_STREAM_ERROR)
+            return err;
+        if (strm->avail_out == 0)
+            return Z_BUF_ERROR;
+    }
+    if (s->level != level) {
+        if (s->level == 0 && s->matches != 0) {
+            if (s->matches == 1)
+                slide_hash(s);
+            else
+                CLEAR_HASH(s);
+            s->matches = 0;
+        }
+        s->level = level;
+        s->max_lazy_match   = configuration_table[level].max_lazy;
+        s->good_match       = configuration_table[level].good_length;
+        s->nice_match       = configuration_table[level].nice_length;
+        s->max_chain_length = configuration_table[level].max_chain;
+    }
+    s->strategy = strategy;
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateTune(strm, good_length, max_lazy, nice_length, max_chain)
+    z_streamp strm;
+    int good_length;
+    int max_lazy;
+    int nice_length;
+    int max_chain;
+{
+    deflate_state *s;
+
+    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
+    s = strm->state;
+    s->good_match = (uInt)good_length;
+    s->max_lazy_match = (uInt)max_lazy;
+    s->nice_match = nice_length;
+    s->max_chain_length = (uInt)max_chain;
+    return Z_OK;
+}
+
+/* =========================================================================
+ * For the default windowBits of 15 and memLevel of 8, this function returns
+ * a close to exact, as well as small, upper bound on the compressed size.
+ * They are coded as constants here for a reason--if the #define's are
+ * changed, then this function needs to be changed as well.  The return
+ * value for 15 and 8 only works for those exact settings.
+ *
+ * For any setting other than those defaults for windowBits and memLevel,
+ * the value returned is a conservative worst case for the maximum expansion
+ * resulting from using fixed blocks instead of stored blocks, which deflate
+ * can emit on compressed data for some combinations of the parameters.
+ *
+ * This function could be more sophisticated to provide closer upper bounds for
+ * every combination of windowBits and memLevel.  But even the conservative
+ * upper bound of about 14% expansion does not seem onerous for output buffer
+ * allocation.
+ */
+uLong ZEXPORT deflateBound(strm, sourceLen)
+    z_streamp strm;
+    uLong sourceLen;
+{
+    deflate_state *s;
+    uLong complen, wraplen;
+
+    /* conservative upper bound for compressed data */
+    complen = sourceLen +
+              ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 5;
+
+    /* if can't get parameters, return conservative bound plus zlib wrapper */
+    if (deflateStateCheck(strm))
+        return complen + 6;
+
+    /* compute wrapper length */
+    s = strm->state;
+    switch (s->wrap) {
+    case 0:                                 /* raw deflate */
+        wraplen = 0;
+        break;
+    case 1:                                 /* zlib wrapper */
+        wraplen = 6 + (s->strstart ? 4 : 0);
+        break;
+#ifdef GZIP
+    case 2:                                 /* gzip wrapper */
+        wraplen = 18;
+        if (s->gzhead != Z_NULL) {          /* user-supplied gzip header */
+            Bytef *str;
+            if (s->gzhead->extra != Z_NULL)
+                wraplen += 2 + s->gzhead->extra_len;
+            str = s->gzhead->name;
+            if (str != Z_NULL)
+                do {
+                    wraplen++;
+                } while (*str++);
+            str = s->gzhead->comment;
+            if (str != Z_NULL)
+                do {
+                    wraplen++;
+                } while (*str++);
+            if (s->gzhead->hcrc)
+                wraplen += 2;
+        }
+        break;
+#endif
+    default:                                /* for compiler happiness */
+        wraplen = 6;
+    }
+
+    /* if not default parameters, return conservative bound */
+    if (s->w_bits != 15 || s->hash_bits != 8 + 7)
+        return complen + wraplen;
+
+    /* default settings: return tight bound for that case */
+    return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) +
+           (sourceLen >> 25) + 13 - 6 + wraplen;
+}
+
+/* =========================================================================
+ * Put a short in the pending buffer. The 16-bit value is put in MSB order.
+ * IN assertion: the stream state is correct and there is enough room in
+ * pending_buf.
+ */
+local void putShortMSB (s, b)
+    deflate_state *s;
+    uInt b;
+{
+    put_byte(s, (Byte)(b >> 8));
+    put_byte(s, (Byte)(b & 0xff));
+}
+
+/* =========================================================================
+ * Flush as much pending output as possible. All deflate() output, except for
+ * some deflate_stored() output, goes through this function so some
+ * applications may wish to modify it to avoid allocating a large
+ * strm->next_out buffer and copying into it. (See also read_buf()).
+ */
+local void flush_pending(strm)
+    z_streamp strm;
+{
+    unsigned len;
+    deflate_state *s = strm->state;
+
+    _tr_flush_bits(s);
+    len = s->pending;
+    if (len > strm->avail_out) len = strm->avail_out;
+    if (len == 0) return;
+
+    zmemcpy(strm->next_out, s->pending_out, len);
+    strm->next_out  += len;
+    s->pending_out  += len;
+    strm->total_out += len;
+    strm->avail_out -= len;
+    s->pending      -= len;
+    if (s->pending == 0) {
+        s->pending_out = s->pending_buf;
+    }
+}
+
+/* ===========================================================================
+ * Update the header CRC with the bytes s->pending_buf[beg..s->pending - 1].
+ */
+#define HCRC_UPDATE(beg) \
+    do { \
+        if (s->gzhead->hcrc && s->pending > (beg)) \
+            strm->adler = crc32(strm->adler, s->pending_buf + (beg), \
+                                s->pending - (beg)); \
+    } while (0)
+
+/* ========================================================================= */
+int ZEXPORT deflate (strm, flush)
+    z_streamp strm;
+    int flush;
+{
+    int old_flush; /* value of flush param for previous deflate call */
+    deflate_state *s;
+
+    if (deflateStateCheck(strm) || flush > Z_BLOCK || flush < 0) {
+        return Z_STREAM_ERROR;
+    }
+    s = strm->state;
+
+    if (strm->next_out == Z_NULL ||
+        (strm->avail_in != 0 && strm->next_in == Z_NULL) ||
+        (s->status == FINISH_STATE && flush != Z_FINISH)) {
+        ERR_RETURN(strm, Z_STREAM_ERROR);
+    }
+    if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR);
+
+    old_flush = s->last_flush;
+    s->last_flush = flush;
+
+    /* Flush as much pending output as possible */
+    if (s->pending != 0) {
+        flush_pending(strm);
+        if (strm->avail_out == 0) {
+            /* Since avail_out is 0, deflate will be called again with
+             * more output space, but possibly with both pending and
+             * avail_in equal to zero. There won't be anything to do,
+             * but this is not an error situation so make sure we
+             * return OK instead of BUF_ERROR at next call of deflate:
+             */
+            s->last_flush = -1;
+            return Z_OK;
+        }
+
+    /* Make sure there is something to do and avoid duplicate consecutive
+     * flushes. For repeated and useless calls with Z_FINISH, we keep
+     * returning Z_STREAM_END instead of Z_BUF_ERROR.
+     */
+    } else if (strm->avail_in == 0 && RANK(flush) <= RANK(old_flush) &&
+               flush != Z_FINISH) {
+        ERR_RETURN(strm, Z_BUF_ERROR);
+    }
+
+    /* User must not provide more input after the first FINISH: */
+    if (s->status == FINISH_STATE && strm->avail_in != 0) {
+        ERR_RETURN(strm, Z_BUF_ERROR);
+    }
+
+    /* Write the header */
+    if (s->status == INIT_STATE) {
+        /* zlib header */
+        uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8;
+        uInt level_flags;
+
+        if (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2)
+            level_flags = 0;
+        else if (s->level < 6)
+            level_flags = 1;
+        else if (s->level == 6)
+            level_flags = 2;
+        else
+            level_flags = 3;
+        header |= (level_flags << 6);
+        if (s->strstart != 0) header |= PRESET_DICT;
+        header += 31 - (header % 31);
+
+        putShortMSB(s, header);
+
+        /* Save the adler32 of the preset dictionary: */
+        if (s->strstart != 0) {
+            putShortMSB(s, (uInt)(strm->adler >> 16));
+            putShortMSB(s, (uInt)(strm->adler & 0xffff));
+        }
+        strm->adler = adler32(0L, Z_NULL, 0);
+        s->status = BUSY_STATE;
+
+        /* Compression must start with an empty pending buffer */
+        flush_pending(strm);
+        if (s->pending != 0) {
+            s->last_flush = -1;
+            return Z_OK;
+        }
+    }
+#ifdef GZIP
+    if (s->status == GZIP_STATE) {
+        /* gzip header */
+        strm->adler = crc32(0L, Z_NULL, 0);
+        put_byte(s, 31);
+        put_byte(s, 139);
+        put_byte(s, 8);
+        if (s->gzhead == Z_NULL) {
+            put_byte(s, 0);
+            put_byte(s, 0);
+            put_byte(s, 0);
+            put_byte(s, 0);
+            put_byte(s, 0);
+            put_byte(s, s->level == 9 ? 2 :
+                     (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ?
+                      4 : 0));
+            put_byte(s, OS_CODE);
+            s->status = BUSY_STATE;
+
+            /* Compression must start with an empty pending buffer */
+            flush_pending(strm);
+            if (s->pending != 0) {
+                s->last_flush = -1;
+                return Z_OK;
+            }
+        }
+        else {
+            put_byte(s, (s->gzhead->text ? 1 : 0) +
+                     (s->gzhead->hcrc ? 2 : 0) +
+                     (s->gzhead->extra == Z_NULL ? 0 : 4) +
+                     (s->gzhead->name == Z_NULL ? 0 : 8) +
+                     (s->gzhead->comment == Z_NULL ? 0 : 16)
+                     );
+            put_byte(s, (Byte)(s->gzhead->time & 0xff));
+            put_byte(s, (Byte)((s->gzhead->time >> 8) & 0xff));
+            put_byte(s, (Byte)((s->gzhead->time >> 16) & 0xff));
+            put_byte(s, (Byte)((s->gzhead->time >> 24) & 0xff));
+            put_byte(s, s->level == 9 ? 2 :
+                     (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ?
+                      4 : 0));
+            put_byte(s, s->gzhead->os & 0xff);
+            if (s->gzhead->extra != Z_NULL) {
+                put_byte(s, s->gzhead->extra_len & 0xff);
+                put_byte(s, (s->gzhead->extra_len >> 8) & 0xff);
+            }
+            if (s->gzhead->hcrc)
+                strm->adler = crc32(strm->adler, s->pending_buf,
+                                    s->pending);
+            s->gzindex = 0;
+            s->status = EXTRA_STATE;
+        }
+    }
+    if (s->status == EXTRA_STATE) {
+        if (s->gzhead->extra != Z_NULL) {
+            ulg beg = s->pending;   /* start of bytes to update crc */
+            uInt left = (s->gzhead->extra_len & 0xffff) - s->gzindex;
+            while (s->pending + left > s->pending_buf_size) {
+                uInt copy = s->pending_buf_size - s->pending;
+                zmemcpy(s->pending_buf + s->pending,
+                        s->gzhead->extra + s->gzindex, copy);
+                s->pending = s->pending_buf_size;
+                HCRC_UPDATE(beg);
+                s->gzindex += copy;
+                flush_pending(strm);
+                if (s->pending != 0) {
+                    s->last_flush = -1;
+                    return Z_OK;
+                }
+                beg = 0;
+                left -= copy;
+            }
+            zmemcpy(s->pending_buf + s->pending,
+                    s->gzhead->extra + s->gzindex, left);
+            s->pending += left;
+            HCRC_UPDATE(beg);
+            s->gzindex = 0;
+        }
+        s->status = NAME_STATE;
+    }
+    if (s->status == NAME_STATE) {
+        if (s->gzhead->name != Z_NULL) {
+            ulg beg = s->pending;   /* start of bytes to update crc */
+            int val;
+            do {
+                if (s->pending == s->pending_buf_size) {
+                    HCRC_UPDATE(beg);
+                    flush_pending(strm);
+                    if (s->pending != 0) {
+                        s->last_flush = -1;
+                        return Z_OK;
+                    }
+                    beg = 0;
+                }
+                val = s->gzhead->name[s->gzindex++];
+                put_byte(s, val);
+            } while (val != 0);
+            HCRC_UPDATE(beg);
+            s->gzindex = 0;
+        }
+        s->status = COMMENT_STATE;
+    }
+    if (s->status == COMMENT_STATE) {
+        if (s->gzhead->comment != Z_NULL) {
+            ulg beg = s->pending;   /* start of bytes to update crc */
+            int val;
+            do {
+                if (s->pending == s->pending_buf_size) {
+                    HCRC_UPDATE(beg);
+                    flush_pending(strm);
+                    if (s->pending != 0) {
+                        s->last_flush = -1;
+                        return Z_OK;
+                    }
+                    beg = 0;
+                }
+                val = s->gzhead->comment[s->gzindex++];
+                put_byte(s, val);
+            } while (val != 0);
+            HCRC_UPDATE(beg);
+        }
+        s->status = HCRC_STATE;
+    }
+    if (s->status == HCRC_STATE) {
+        if (s->gzhead->hcrc) {
+            if (s->pending + 2 > s->pending_buf_size) {
+                flush_pending(strm);
+                if (s->pending != 0) {
+                    s->last_flush = -1;
+                    return Z_OK;
+                }
+            }
+            put_byte(s, (Byte)(strm->adler & 0xff));
+            put_byte(s, (Byte)((strm->adler >> 8) & 0xff));
+            strm->adler = crc32(0L, Z_NULL, 0);
+        }
+        s->status = BUSY_STATE;
+
+        /* Compression must start with an empty pending buffer */
+        flush_pending(strm);
+        if (s->pending != 0) {
+            s->last_flush = -1;
+            return Z_OK;
+        }
+    }
+#endif
+
+    /* Start a new block or continue the current one.
+     */
+    if (strm->avail_in != 0 || s->lookahead != 0 ||
+        (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) {
+        block_state bstate;
+
+        bstate = s->level == 0 ? deflate_stored(s, flush) :
+                 s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) :
+                 s->strategy == Z_RLE ? deflate_rle(s, flush) :
+                 (*(configuration_table[s->level].func))(s, flush);
+
+        if (bstate == finish_started || bstate == finish_done) {
+            s->status = FINISH_STATE;
+        }
+        if (bstate == need_more || bstate == finish_started) {
+            if (strm->avail_out == 0) {
+                s->last_flush = -1; /* avoid BUF_ERROR next call, see above */
+            }
+            return Z_OK;
+            /* If flush != Z_NO_FLUSH && avail_out == 0, the next call
+             * of deflate should use the same flush parameter to make sure
+             * that the flush is complete. So we don't have to output an
+             * empty block here, this will be done at next call. This also
+             * ensures that for a very small output buffer, we emit at most
+             * one empty block.
+             */
+        }
+        if (bstate == block_done) {
+            if (flush == Z_PARTIAL_FLUSH) {
+                _tr_align(s);
+            } else if (flush != Z_BLOCK) { /* FULL_FLUSH or SYNC_FLUSH */
+                _tr_stored_block(s, (char*)0, 0L, 0);
+                /* For a full flush, this empty block will be recognized
+                 * as a special marker by inflate_sync().
+                 */
+                if (flush == Z_FULL_FLUSH) {
+                    CLEAR_HASH(s);             /* forget history */
+                    if (s->lookahead == 0) {
+                        s->strstart = 0;
+                        s->block_start = 0L;
+                        s->insert = 0;
+                    }
+                }
+            }
+            flush_pending(strm);
+            if (strm->avail_out == 0) {
+              s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */
+              return Z_OK;
+            }
+        }
+    }
+
+    if (flush != Z_FINISH) return Z_OK;
+    if (s->wrap <= 0) return Z_STREAM_END;
+
+    /* Write the trailer */
+#ifdef GZIP
+    if (s->wrap == 2) {
+        put_byte(s, (Byte)(strm->adler & 0xff));
+        put_byte(s, (Byte)((strm->adler >> 8) & 0xff));
+        put_byte(s, (Byte)((strm->adler >> 16) & 0xff));
+        put_byte(s, (Byte)((strm->adler >> 24) & 0xff));
+        put_byte(s, (Byte)(strm->total_in & 0xff));
+        put_byte(s, (Byte)((strm->total_in >> 8) & 0xff));
+        put_byte(s, (Byte)((strm->total_in >> 16) & 0xff));
+        put_byte(s, (Byte)((strm->total_in >> 24) & 0xff));
+    }
+    else
+#endif
+    {
+        putShortMSB(s, (uInt)(strm->adler >> 16));
+        putShortMSB(s, (uInt)(strm->adler & 0xffff));
+    }
+    flush_pending(strm);
+    /* If avail_out is zero, the application will call deflate again
+     * to flush the rest.
+     */
+    if (s->wrap > 0) s->wrap = -s->wrap; /* write the trailer only once! */
+    return s->pending != 0 ? Z_OK : Z_STREAM_END;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateEnd (strm)
+    z_streamp strm;
+{
+    int status;
+
+    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
+
+    status = strm->state->status;
+
+    /* Deallocate in reverse order of allocations: */
+    TRY_FREE(strm, strm->state->pending_buf);
+    TRY_FREE(strm, strm->state->head);
+    TRY_FREE(strm, strm->state->prev);
+    TRY_FREE(strm, strm->state->window);
+
+    ZFREE(strm, strm->state);
+    strm->state = Z_NULL;
+
+    return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK;
+}
+
+/* =========================================================================
+ * Copy the source state to the destination state.
+ * To simplify the source, this is not supported for 16-bit MSDOS (which
+ * doesn't have enough memory anyway to duplicate compression states).
+ */
+int ZEXPORT deflateCopy (dest, source)
+    z_streamp dest;
+    z_streamp source;
+{
+#ifdef MAXSEG_64K
+    return Z_STREAM_ERROR;
+#else
+    deflate_state *ds;
+    deflate_state *ss;
+    ushf *overlay;
+
+
+    if (deflateStateCheck(source) || dest == Z_NULL) {
+        return Z_STREAM_ERROR;
+    }
+
+    ss = source->state;
+
+    zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream));
+
+    ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state));
+    if (ds == Z_NULL) return Z_MEM_ERROR;
+    dest->state = (struct internal_state FAR *) ds;
+    zmemcpy((voidpf)ds, (voidpf)ss, sizeof(deflate_state));
+    ds->strm = dest;
+
+    ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte));
+    ds->prev   = (Posf *)  ZALLOC(dest, ds->w_size, sizeof(Pos));
+    ds->head   = (Posf *)  ZALLOC(dest, ds->hash_size, sizeof(Pos));
+    overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2);
+    ds->pending_buf = (uchf *) overlay;
+
+    if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL ||
+        ds->pending_buf == Z_NULL) {
+        deflateEnd (dest);
+        return Z_MEM_ERROR;
+    }
+    /* following zmemcpy do not work for 16-bit MSDOS */
+    zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte));
+    zmemcpy((voidpf)ds->prev, (voidpf)ss->prev, ds->w_size * sizeof(Pos));
+    zmemcpy((voidpf)ds->head, (voidpf)ss->head, ds->hash_size * sizeof(Pos));
+    zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
+
+    ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
+    ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush);
+    ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize;
+
+    ds->l_desc.dyn_tree = ds->dyn_ltree;
+    ds->d_desc.dyn_tree = ds->dyn_dtree;
+    ds->bl_desc.dyn_tree = ds->bl_tree;
+
+    return Z_OK;
+#endif /* MAXSEG_64K */
+}
+
+/* ===========================================================================
+ * Read a new buffer from the current input stream, update the adler32
+ * and total number of bytes read.  All deflate() input goes through
+ * this function so some applications may wish to modify it to avoid
+ * allocating a large strm->next_in buffer and copying from it.
+ * (See also flush_pending()).
+ */
+local unsigned read_buf(strm, buf, size)
+    z_streamp strm;
+    Bytef *buf;
+    unsigned size;
+{
+    unsigned len = strm->avail_in;
+
+    if (len > size) len = size;
+    if (len == 0) return 0;
+
+    strm->avail_in  -= len;
+
+    zmemcpy(buf, strm->next_in, len);
+    if (strm->state->wrap == 1) {
+        strm->adler = adler32(strm->adler, buf, len);
+    }
+#ifdef GZIP
+    else if (strm->state->wrap == 2) {
+        strm->adler = crc32(strm->adler, buf, len);
+    }
+#endif
+    strm->next_in  += len;
+    strm->total_in += len;
+
+    return len;
+}
+
+/* ===========================================================================
+ * Initialize the "longest match" routines for a new zlib stream
+ */
+local void lm_init (s)
+    deflate_state *s;
+{
+    s->window_size = (ulg)2L*s->w_size;
+
+    CLEAR_HASH(s);
+
+    /* Set the default configuration parameters:
+     */
+    s->max_lazy_match   = configuration_table[s->level].max_lazy;
+    s->good_match       = configuration_table[s->level].good_length;
+    s->nice_match       = configuration_table[s->level].nice_length;
+    s->max_chain_length = configuration_table[s->level].max_chain;
+
+    s->strstart = 0;
+    s->block_start = 0L;
+    s->lookahead = 0;
+    s->insert = 0;
+    s->match_length = s->prev_length = MIN_MATCH-1;
+    s->match_available = 0;
+    s->ins_h = 0;
+#ifndef FASTEST
+#ifdef ASMV
+    match_init(); /* initialize the asm code */
+#endif
+#endif
+}
+
+#ifndef FASTEST
+/* ===========================================================================
+ * Set match_start to the longest match starting at the given string and
+ * return its length. Matches shorter or equal to prev_length are discarded,
+ * in which case the result is equal to prev_length and match_start is
+ * garbage.
+ * IN assertions: cur_match is the head of the hash chain for the current
+ *   string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
+ * OUT assertion: the match length is not greater than s->lookahead.
+ */
+#ifndef ASMV
+/* For 80x86 and 680x0, an optimized version will be provided in match.asm or
+ * match.S. The code will be functionally equivalent.
+ */
+local uInt longest_match(s, cur_match)
+    deflate_state *s;
+    IPos cur_match;                             /* current match */
+{
+    unsigned chain_length = s->max_chain_length;/* max hash chain length */
+    register Bytef *scan = s->window + s->strstart; /* current string */
+    register Bytef *match;                      /* matched string */
+    register int len;                           /* length of current match */
+    int best_len = (int)s->prev_length;         /* best match length so far */
+    int nice_match = s->nice_match;             /* stop if match long enough */
+    IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
+        s->strstart - (IPos)MAX_DIST(s) : NIL;
+    /* Stop when cur_match becomes <= limit. To simplify the code,
+     * we prevent matches with the string of window index 0.
+     */
+    Posf *prev = s->prev;
+    uInt wmask = s->w_mask;
+
+#ifdef UNALIGNED_OK
+    /* Compare two bytes at a time. Note: this is not always beneficial.
+     * Try with and without -DUNALIGNED_OK to check.
+     */
+    register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1;
+    register ush scan_start = *(ushf*)scan;
+    register ush scan_end   = *(ushf*)(scan+best_len-1);
+#else
+    register Bytef *strend = s->window + s->strstart + MAX_MATCH;
+    register Byte scan_end1  = scan[best_len-1];
+    register Byte scan_end   = scan[best_len];
+#endif
+
+    /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
+     * It is easy to get rid of this optimization if necessary.
+     */
+    Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
+
+    /* Do not waste too much time if we already have a good match: */
+    if (s->prev_length >= s->good_match) {
+        chain_length >>= 2;
+    }
+    /* Do not look for matches beyond the end of the input. This is necessary
+     * to make deflate deterministic.
+     */
+    if ((uInt)nice_match > s->lookahead) nice_match = (int)s->lookahead;
+
+    Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
+
+    do {
+        Assert(cur_match < s->strstart, "no future");
+        match = s->window + cur_match;
+
+        /* Skip to next match if the match length cannot increase
+         * or if the match length is less than 2.  Note that the checks below
+         * for insufficient lookahead only occur occasionally for performance
+         * reasons.  Therefore uninitialized memory will be accessed, and
+         * conditional jumps will be made that depend on those values.
+         * However the length of the match is limited to the lookahead, so
+         * the output of deflate is not affected by the uninitialized values.
+         */
+#if (defined(UNALIGNED_OK) && MAX_MATCH == 258)
+        /* This code assumes sizeof(unsigned short) == 2. Do not use
+         * UNALIGNED_OK if your compiler uses a different size.
+         */
+        if (*(ushf*)(match+best_len-1) != scan_end ||
+            *(ushf*)match != scan_start) continue;
+
+        /* It is not necessary to compare scan[2] and match[2] since they are
+         * always equal when the other bytes match, given that the hash keys
+         * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at
+         * strstart+3, +5, ... up to strstart+257. We check for insufficient
+         * lookahead only every 4th comparison; the 128th check will be made
+         * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is
+         * necessary to put more guard bytes at the end of the window, or
+         * to check more often for insufficient lookahead.
+         */
+        Assert(scan[2] == match[2], "scan[2]?");
+        scan++, match++;
+        do {
+        } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+                 *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+                 *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+                 *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+                 scan < strend);
+        /* The funny "do {}" generates better code on most compilers */
+
+        /* Here, scan <= window+strstart+257 */
+        Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
+        if (*scan == *match) scan++;
+
+        len = (MAX_MATCH - 1) - (int)(strend-scan);
+        scan = strend - (MAX_MATCH-1);
+
+#else /* UNALIGNED_OK */
+
+        if (match[best_len]   != scan_end  ||
+            match[best_len-1] != scan_end1 ||
+            *match            != *scan     ||
+            *++match          != scan[1])      continue;
+
+        /* The check at best_len-1 can be removed because it will be made
+         * again later. (This heuristic is not always a win.)
+         * It is not necessary to compare scan[2] and match[2] since they
+         * are always equal when the other bytes match, given that
+         * the hash keys are equal and that HASH_BITS >= 8.
+         */
+        scan += 2, match++;
+        Assert(*scan == *match, "match[2]?");
+
+        /* We check for insufficient lookahead only every 8th comparison;
+         * the 256th check will be made at strstart+258.
+         */
+        do {
+        } while (*++scan == *++match && *++scan == *++match &&
+                 *++scan == *++match && *++scan == *++match &&
+                 *++scan == *++match && *++scan == *++match &&
+                 *++scan == *++match && *++scan == *++match &&
+                 scan < strend);
+
+        Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
+
+        len = MAX_MATCH - (int)(strend - scan);
+        scan = strend - MAX_MATCH;
+
+#endif /* UNALIGNED_OK */
+
+        if (len > best_len) {
+            s->match_start = cur_match;
+            best_len = len;
+            if (len >= nice_match) break;
+#ifdef UNALIGNED_OK
+            scan_end = *(ushf*)(scan+best_len-1);
+#else
+            scan_end1  = scan[best_len-1];
+            scan_end   = scan[best_len];
+#endif
+        }
+    } while ((cur_match = prev[cur_match & wmask]) > limit
+             && --chain_length != 0);
+
+    if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
+    return s->lookahead;
+}
+#endif /* ASMV */
+
+#else /* FASTEST */
+
+/* ---------------------------------------------------------------------------
+ * Optimized version for FASTEST only
+ */
+local uInt longest_match(s, cur_match)
+    deflate_state *s;
+    IPos cur_match;                             /* current match */
+{
+    register Bytef *scan = s->window + s->strstart; /* current string */
+    register Bytef *match;                       /* matched string */
+    register int len;                           /* length of current match */
+    register Bytef *strend = s->window + s->strstart + MAX_MATCH;
+
+    /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
+     * It is easy to get rid of this optimization if necessary.
+     */
+    Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
+
+    Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
+
+    Assert(cur_match < s->strstart, "no future");
+
+    match = s->window + cur_match;
+
+    /* Return failure if the match length is less than 2:
+     */
+    if (match[0] != scan[0] || match[1] != scan[1]) return MIN_MATCH-1;
+
+    /* The check at best_len-1 can be removed because it will be made
+     * again later. (This heuristic is not always a win.)
+     * It is not necessary to compare scan[2] and match[2] since they
+     * are always equal when the other bytes match, given that
+     * the hash keys are equal and that HASH_BITS >= 8.
+     */
+    scan += 2, match += 2;
+    Assert(*scan == *match, "match[2]?");
+
+    /* We check for insufficient lookahead only every 8th comparison;
+     * the 256th check will be made at strstart+258.
+     */
+    do {
+    } while (*++scan == *++match && *++scan == *++match &&
+             *++scan == *++match && *++scan == *++match &&
+             *++scan == *++match && *++scan == *++match &&
+             *++scan == *++match && *++scan == *++match &&
+             scan < strend);
+
+    Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
+
+    len = MAX_MATCH - (int)(strend - scan);
+
+    if (len < MIN_MATCH) return MIN_MATCH - 1;
+
+    s->match_start = cur_match;
+    return (uInt)len <= s->lookahead ? (uInt)len : s->lookahead;
+}
+
+#endif /* FASTEST */
+
+#ifdef ZLIB_DEBUG
+
+#define EQUAL 0
+/* result of memcmp for equal strings */
+
+/* ===========================================================================
+ * Check that the match at match_start is indeed a match.
+ */
+local void check_match(s, start, match, length)
+    deflate_state *s;
+    IPos start, match;
+    int length;
+{
+    /* check that the match is indeed a match */
+    if (zmemcmp(s->window + match,
+                s->window + start, length) != EQUAL) {
+        fprintf(stderr, " start %u, match %u, length %d\n",
+                start, match, length);
+        do {
+            fprintf(stderr, "%c%c", s->window[match++], s->window[start++]);
+        } while (--length != 0);
+        z_error("invalid match");
+    }
+    if (z_verbose > 1) {
+        fprintf(stderr,"\\[%d,%d]", start-match, length);
+        do { putc(s->window[start++], stderr); } while (--length != 0);
+    }
+}
+#else
+#  define check_match(s, start, match, length)
+#endif /* ZLIB_DEBUG */
+
+/* ===========================================================================
+ * Fill the window when the lookahead becomes insufficient.
+ * Updates strstart and lookahead.
+ *
+ * IN assertion: lookahead < MIN_LOOKAHEAD
+ * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD
+ *    At least one byte has been read, or avail_in == 0; reads are
+ *    performed for at least two bytes (required for the zip translate_eol
+ *    option -- not supported here).
+ */
+local void fill_window(s)
+    deflate_state *s;
+{
+    unsigned n;
+    unsigned more;    /* Amount of free space at the end of the window. */
+    uInt wsize = s->w_size;
+
+    Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead");
+
+    do {
+        more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart);
+
+        /* Deal with !@#$% 64K limit: */
+        if (sizeof(int) <= 2) {
+            if (more == 0 && s->strstart == 0 && s->lookahead == 0) {
+                more = wsize;
+
+            } else if (more == (unsigned)(-1)) {
+                /* Very unlikely, but possible on 16 bit machine if
+                 * strstart == 0 && lookahead == 1 (input done a byte at time)
+                 */
+                more--;
+            }
+        }
+
+        /* If the window is almost full and there is insufficient lookahead,
+         * move the upper half to the lower one to make room in the upper half.
+         */
+        if (s->strstart >= wsize+MAX_DIST(s)) {
+
+            zmemcpy(s->window, s->window+wsize, (unsigned)wsize - more);
+            s->match_start -= wsize;
+            s->strstart    -= wsize; /* we now have strstart >= MAX_DIST */
+            s->block_start -= (long) wsize;
+            slide_hash(s);
+            more += wsize;
+        }
+        if (s->strm->avail_in == 0) break;
+
+        /* If there was no sliding:
+         *    strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
+         *    more == window_size - lookahead - strstart
+         * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
+         * => more >= window_size - 2*WSIZE + 2
+         * In the BIG_MEM or MMAP case (not yet supported),
+         *   window_size == input_size + MIN_LOOKAHEAD  &&
+         *   strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
+         * Otherwise, window_size == 2*WSIZE so more >= 2.
+         * If there was sliding, more >= WSIZE. So in all cases, more >= 2.
+         */
+        Assert(more >= 2, "more < 2");
+
+        n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
+        s->lookahead += n;
+
+        /* Initialize the hash value now that we have some input: */
+        if (s->lookahead + s->insert >= MIN_MATCH) {
+            uInt str = s->strstart - s->insert;
+            s->ins_h = s->window[str];
+            UPDATE_HASH(s, s->ins_h, s->window[str + 1]);
+#if MIN_MATCH != 3
+            Call UPDATE_HASH() MIN_MATCH-3 more times
+#endif
+            while (s->insert) {
+                UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]);
+#ifndef FASTEST
+                s->prev[str & s->w_mask] = s->head[s->ins_h];
+#endif
+                s->head[s->ins_h] = (Pos)str;
+                str++;
+                s->insert--;
+                if (s->lookahead + s->insert < MIN_MATCH)
+                    break;
+            }
+        }
+        /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
+         * but this is not important since only literal bytes will be emitted.
+         */
+
+    } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
+
+    /* If the WIN_INIT bytes after the end of the current data have never been
+     * written, then zero those bytes in order to avoid memory check reports of
+     * the use of uninitialized (or uninitialised as Julian writes) bytes by
+     * the longest match routines.  Update the high water mark for the next
+     * time through here.  WIN_INIT is set to MAX_MATCH since the longest match
+     * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead.
+     */
+    if (s->high_water < s->window_size) {
+        ulg curr = s->strstart + (ulg)(s->lookahead);
+        ulg init;
+
+        if (s->high_water < curr) {
+            /* Previous high water mark below current data -- zero WIN_INIT
+             * bytes or up to end of window, whichever is less.
+             */
+            init = s->window_size - curr;
+            if (init > WIN_INIT)
+                init = WIN_INIT;
+            zmemzero(s->window + curr, (unsigned)init);
+            s->high_water = curr + init;
+        }
+        else if (s->high_water < (ulg)curr + WIN_INIT) {
+            /* High water mark at or above current data, but below current data
+             * plus WIN_INIT -- zero out to current data plus WIN_INIT, or up
+             * to end of window, whichever is less.
+             */
+            init = (ulg)curr + WIN_INIT - s->high_water;
+            if (init > s->window_size - s->high_water)
+                init = s->window_size - s->high_water;
+            zmemzero(s->window + s->high_water, (unsigned)init);
+            s->high_water += init;
+        }
+    }
+
+    Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD,
+           "not enough room for search");
+}
+
+/* ===========================================================================
+ * Flush the current block, with given end-of-file flag.
+ * IN assertion: strstart is set to the end of the current match.
+ */
+#define FLUSH_BLOCK_ONLY(s, last) { \
+   _tr_flush_block(s, (s->block_start >= 0L ? \
+                   (charf *)&s->window[(unsigned)s->block_start] : \
+                   (charf *)Z_NULL), \
+                (ulg)((long)s->strstart - s->block_start), \
+                (last)); \
+   s->block_start = s->strstart; \
+   flush_pending(s->strm); \
+   Tracev((stderr,"[FLUSH]")); \
+}
+
+/* Same but force premature exit if necessary. */
+#define FLUSH_BLOCK(s, last) { \
+   FLUSH_BLOCK_ONLY(s, last); \
+   if (s->strm->avail_out == 0) return (last) ? finish_started : need_more; \
+}
+
+/* Maximum stored block length in deflate format (not including header). */
+#define MAX_STORED 65535
+
+/* Minimum of a and b. */
+#define MIN(a, b) ((a) > (b) ? (b) : (a))
+
+/* ===========================================================================
+ * Copy without compression as much as possible from the input stream, return
+ * the current block state.
+ *
+ * In case deflateParams() is used to later switch to a non-zero compression
+ * level, s->matches (otherwise unused when storing) keeps track of the number
+ * of hash table slides to perform. If s->matches is 1, then one hash table
+ * slide will be done when switching. If s->matches is 2, the maximum value
+ * allowed here, then the hash table will be cleared, since two or more slides
+ * is the same as a clear.
+ *
+ * deflate_stored() is written to minimize the number of times an input byte is
+ * copied. It is most efficient with large input and output buffers, which
+ * maximizes the opportunites to have a single copy from next_in to next_out.
+ */
+local block_state deflate_stored(s, flush)
+    deflate_state *s;
+    int flush;
+{
+    /* Smallest worthy block size when not flushing or finishing. By default
+     * this is 32K. This can be as small as 507 bytes for memLevel == 1. For
+     * large input and output buffers, the stored block size will be larger.
+     */
+    unsigned min_block = MIN(s->pending_buf_size - 5, s->w_size);
+
+    /* Copy as many min_block or larger stored blocks directly to next_out as
+     * possible. If flushing, copy the remaining available input to next_out as
+     * stored blocks, if there is enough space.
+     */
+    unsigned len, left, have, last = 0;
+    unsigned used = s->strm->avail_in;
+    do {
+        /* Set len to the maximum size block that we can copy directly with the
+         * available input data and output space. Set left to how much of that
+         * would be copied from what's left in the window.
+         */
+        len = MAX_STORED;       /* maximum deflate stored block length */
+        have = (s->bi_valid + 42) >> 3;         /* number of header bytes */
+        if (s->strm->avail_out < have)          /* need room for header */
+            break;
+            /* maximum stored block length that will fit in avail_out: */
+        have = s->strm->avail_out - have;
+        left = s->strstart - s->block_start;    /* bytes left in window */
+        if (len > (ulg)left + s->strm->avail_in)
+            len = left + s->strm->avail_in;     /* limit len to the input */
+        if (len > have)
+            len = have;                         /* limit len to the output */
+
+        /* If the stored block would be less than min_block in length, or if
+         * unable to copy all of the available input when flushing, then try
+         * copying to the window and the pending buffer instead. Also don't
+         * write an empty block when flushing -- deflate() does that.
+         */
+        if (len < min_block && ((len == 0 && flush != Z_FINISH) ||
+                                flush == Z_NO_FLUSH ||
+                                len != left + s->strm->avail_in))
+            break;
+
+        /* Make a dummy stored block in pending to get the header bytes,
+         * including any pending bits. This also updates the debugging counts.
+         */
+        last = flush == Z_FINISH && len == left + s->strm->avail_in ? 1 : 0;
+        _tr_stored_block(s, (char *)0, 0L, last);
+
+        /* Replace the lengths in the dummy stored block with len. */
+        s->pending_buf[s->pending - 4] = len;
+        s->pending_buf[s->pending - 3] = len >> 8;
+        s->pending_buf[s->pending - 2] = ~len;
+        s->pending_buf[s->pending - 1] = ~len >> 8;
+
+        /* Write the stored block header bytes. */
+        flush_pending(s->strm);
+
+#ifdef ZLIB_DEBUG
+        /* Update debugging counts for the data about to be copied. */
+        s->compressed_len += len << 3;
+        s->bits_sent += len << 3;
+#endif
+
+        /* Copy uncompressed bytes from the window to next_out. */
+        if (left) {
+            if (left > len)
+                left = len;
+            zmemcpy(s->strm->next_out, s->window + s->block_start, left);
+            s->strm->next_out += left;
+            s->strm->avail_out -= left;
+            s->strm->total_out += left;
+            s->block_start += left;
+            len -= left;
+        }
+
+        /* Copy uncompressed bytes directly from next_in to next_out, updating
+         * the check value.
+         */
+        if (len) {
+            read_buf(s->strm, s->strm->next_out, len);
+            s->strm->next_out += len;
+            s->strm->avail_out -= len;
+            s->strm->total_out += len;
+        }
+    } while (last == 0);
+
+    /* Update the sliding window with the last s->w_size bytes of the copied
+     * data, or append all of the copied data to the existing window if less
+     * than s->w_size bytes were copied. Also update the number of bytes to
+     * insert in the hash tables, in the event that deflateParams() switches to
+     * a non-zero compression level.
+     */
+    used -= s->strm->avail_in;      /* number of input bytes directly copied */
+    if (used) {
+        /* If any input was used, then no unused input remains in the window,
+         * therefore s->block_start == s->strstart.
+         */
+        if (used >= s->w_size) {    /* supplant the previous history */
+            s->matches = 2;         /* clear hash */
+            zmemcpy(s->window, s->strm->next_in - s->w_size, s->w_size);
+            s->strstart = s->w_size;
+        }
+        else {
+            if (s->window_size - s->strstart <= used) {
+                /* Slide the window down. */
+                s->strstart -= s->w_size;
+                zmemcpy(s->window, s->window + s->w_size, s->strstart);
+                if (s->matches < 2)
+                    s->matches++;   /* add a pending slide_hash() */
+            }
+            zmemcpy(s->window + s->strstart, s->strm->next_in - used, used);
+            s->strstart += used;
+        }
+        s->block_start = s->strstart;
+        s->insert += MIN(used, s->w_size - s->insert);
+    }
+    if (s->high_water < s->strstart)
+        s->high_water = s->strstart;
+
+    /* If the last block was written to next_out, then done. */
+    if (last)
+        return finish_done;
+
+    /* If flushing and all input has been consumed, then done. */
+    if (flush != Z_NO_FLUSH && flush != Z_FINISH &&
+        s->strm->avail_in == 0 && (long)s->strstart == s->block_start)
+        return block_done;
+
+    /* Fill the window with any remaining input. */
+    have = s->window_size - s->strstart - 1;
+    if (s->strm->avail_in > have && s->block_start >= (long)s->w_size) {
+        /* Slide the window down. */
+        s->block_start -= s->w_size;
+        s->strstart -= s->w_size;
+        zmemcpy(s->window, s->window + s->w_size, s->strstart);
+        if (s->matches < 2)
+            s->matches++;           /* add a pending slide_hash() */
+        have += s->w_size;          /* more space now */
+    }
+    if (have > s->strm->avail_in)
+        have = s->strm->avail_in;
+    if (have) {
+        read_buf(s->strm, s->window + s->strstart, have);
+        s->strstart += have;
+    }
+    if (s->high_water < s->strstart)
+        s->high_water = s->strstart;
+
+    /* There was not enough avail_out to write a complete worthy or flushed
+     * stored block to next_out. Write a stored block to pending instead, if we
+     * have enough input for a worthy block, or if flushing and there is enough
+     * room for the remaining input as a stored block in the pending buffer.
+     */
+    have = (s->bi_valid + 42) >> 3;         /* number of header bytes */
+        /* maximum stored block length that will fit in pending: */
+    have = MIN(s->pending_buf_size - have, MAX_STORED);
+    min_block = MIN(have, s->w_size);
+    left = s->strstart - s->block_start;
+    if (left >= min_block ||
+        ((left || flush == Z_FINISH) && flush != Z_NO_FLUSH &&
+         s->strm->avail_in == 0 && left <= have)) {
+        len = MIN(left, have);
+        last = flush == Z_FINISH && s->strm->avail_in == 0 &&
+               len == left ? 1 : 0;
+        _tr_stored_block(s, (charf *)s->window + s->block_start, len, last);
+        s->block_start += len;
+        flush_pending(s->strm);
+    }
+
+    /* We've done all we can with the available input and output. */
+    return last ? finish_started : need_more;
+}
+
+/* ===========================================================================
+ * Compress as much as possible from the input stream, return the current
+ * block state.
+ * This function does not perform lazy evaluation of matches and inserts
+ * new strings in the dictionary only for unmatched strings or for short
+ * matches. It is used only for the fast compression options.
+ */
+local block_state deflate_fast(s, flush)
+    deflate_state *s;
+    int flush;
+{
+    IPos hash_head;       /* head of the hash chain */
+    int bflush;           /* set if current block must be flushed */
+
+    for (;;) {
+        /* Make sure that we always have enough lookahead, except
+         * at the end of the input file. We need MAX_MATCH bytes
+         * for the next match, plus MIN_MATCH bytes to insert the
+         * string following the next match.
+         */
+        if (s->lookahead < MIN_LOOKAHEAD) {
+            fill_window(s);
+            if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
+                return need_more;
+            }
+            if (s->lookahead == 0) break; /* flush the current block */
+        }
+
+        /* Insert the string window[strstart .. strstart+2] in the
+         * dictionary, and set hash_head to the head of the hash chain:
+         */
+        hash_head = NIL;
+        if (s->lookahead >= MIN_MATCH) {
+            INSERT_STRING(s, s->strstart, hash_head);
+        }
+
+        /* Find the longest match, discarding those <= prev_length.
+         * At this point we have always match_length < MIN_MATCH
+         */
+        if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) {
+            /* To simplify the code, we prevent matches with the string
+             * of window index 0 (in particular we have to avoid a match
+             * of the string with itself at the start of the input file).
+             */
+            s->match_length = longest_match (s, hash_head);
+            /* longest_match() sets match_start */
+        }
+        if (s->match_length >= MIN_MATCH) {
+            check_match(s, s->strstart, s->match_start, s->match_length);
+
+            _tr_tally_dist(s, s->strstart - s->match_start,
+                           s->match_length - MIN_MATCH, bflush);
+
+            s->lookahead -= s->match_length;
+
+            /* Insert new strings in the hash table only if the match length
+             * is not too large. This saves time but degrades compression.
+             */
+#ifndef FASTEST
+            if (s->match_length <= s->max_insert_length &&
+                s->lookahead >= MIN_MATCH) {
+                s->match_length--; /* string at strstart already in table */
+                do {
+                    s->strstart++;
+                    INSERT_STRING(s, s->strstart, hash_head);
+                    /* strstart never exceeds WSIZE-MAX_MATCH, so there are
+                     * always MIN_MATCH bytes ahead.
+                     */
+                } while (--s->match_length != 0);
+                s->strstart++;
+            } else
+#endif
+            {
+                s->strstart += s->match_length;
+                s->match_length = 0;
+                s->ins_h = s->window[s->strstart];
+                UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]);
+#if MIN_MATCH != 3
+                Call UPDATE_HASH() MIN_MATCH-3 more times
+#endif
+                /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not
+                 * matter since it will be recomputed at next deflate call.
+                 */
+            }
+        } else {
+            /* No match, output a literal byte */
+            Tracevv((stderr,"%c", s->window[s->strstart]));
+            _tr_tally_lit (s, s->window[s->strstart], bflush);
+            s->lookahead--;
+            s->strstart++;
+        }
+        if (bflush) FLUSH_BLOCK(s, 0);
+    }
+    s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
+    if (flush == Z_FINISH) {
+        FLUSH_BLOCK(s, 1);
+        return finish_done;
+    }
+    if (s->last_lit)
+        FLUSH_BLOCK(s, 0);
+    return block_done;
+}
+
+#ifndef FASTEST
+/* ===========================================================================
+ * Same as above, but achieves better compression. We use a lazy
+ * evaluation for matches: a match is finally adopted only if there is
+ * no better match at the next window position.
+ */
+local block_state deflate_slow(s, flush)
+    deflate_state *s;
+    int flush;
+{
+    IPos hash_head;          /* head of hash chain */
+    int bflush;              /* set if current block must be flushed */
+
+    /* Process the input block. */
+    for (;;) {
+        /* Make sure that we always have enough lookahead, except
+         * at the end of the input file. We need MAX_MATCH bytes
+         * for the next match, plus MIN_MATCH bytes to insert the
+         * string following the next match.
+         */
+        if (s->lookahead < MIN_LOOKAHEAD) {
+            fill_window(s);
+            if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
+                return need_more;
+            }
+            if (s->lookahead == 0) break; /* flush the current block */
+        }
+
+        /* Insert the string window[strstart .. strstart+2] in the
+         * dictionary, and set hash_head to the head of the hash chain:
+         */
+        hash_head = NIL;
+        if (s->lookahead >= MIN_MATCH) {
+            INSERT_STRING(s, s->strstart, hash_head);
+        }
+
+        /* Find the longest match, discarding those <= prev_length.
+         */
+        s->prev_length = s->match_length, s->prev_match = s->match_start;
+        s->match_length = MIN_MATCH-1;
+
+        if (hash_head != NIL && s->prev_length < s->max_lazy_match &&
+            s->strstart - hash_head <= MAX_DIST(s)) {
+            /* To simplify the code, we prevent matches with the string
+             * of window index 0 (in particular we have to avoid a match
+             * of the string with itself at the start of the input file).
+             */
+            s->match_length = longest_match (s, hash_head);
+            /* longest_match() sets match_start */
+
+            if (s->match_length <= 5 && (s->strategy == Z_FILTERED
+#if TOO_FAR <= 32767
+                || (s->match_length == MIN_MATCH &&
+                    s->strstart - s->match_start > TOO_FAR)
+#endif
+                )) {
+
+                /* If prev_match is also MIN_MATCH, match_start is garbage
+                 * but we will ignore the current match anyway.
+                 */
+                s->match_length = MIN_MATCH-1;
+            }
+        }
+        /* If there was a match at the previous step and the current
+         * match is not better, output the previous match:
+         */
+        if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) {
+            uInt max_insert = s->strstart + s->lookahead - MIN_MATCH;
+            /* Do not insert strings in hash table beyond this. */
+
+            check_match(s, s->strstart-1, s->prev_match, s->prev_length);
+
+            _tr_tally_dist(s, s->strstart -1 - s->prev_match,
+                           s->prev_length - MIN_MATCH, bflush);
+
+            /* Insert in hash table all strings up to the end of the match.
+             * strstart-1 and strstart are already inserted. If there is not
+             * enough lookahead, the last two strings are not inserted in
+             * the hash table.
+             */
+            s->lookahead -= s->prev_length-1;
+            s->prev_length -= 2;
+            do {
+                if (++s->strstart <= max_insert) {
+                    INSERT_STRING(s, s->strstart, hash_head);
+                }
+            } while (--s->prev_length != 0);
+            s->match_available = 0;
+            s->match_length = MIN_MATCH-1;
+            s->strstart++;
+
+            if (bflush) FLUSH_BLOCK(s, 0);
+
+        } else if (s->match_available) {
+            /* If there was no match at the previous position, output a
+             * single literal. If there was a match but the current match
+             * is longer, truncate the previous match to a single literal.
+             */
+            Tracevv((stderr,"%c", s->window[s->strstart-1]));
+            _tr_tally_lit(s, s->window[s->strstart-1], bflush);
+            if (bflush) {
+                FLUSH_BLOCK_ONLY(s, 0);
+            }
+            s->strstart++;
+            s->lookahead--;
+            if (s->strm->avail_out == 0) return need_more;
+        } else {
+            /* There is no previous match to compare with, wait for
+             * the next step to decide.
+             */
+            s->match_available = 1;
+            s->strstart++;
+            s->lookahead--;
+        }
+    }
+    Assert (flush != Z_NO_FLUSH, "no flush?");
+    if (s->match_available) {
+        Tracevv((stderr,"%c", s->window[s->strstart-1]));
+        _tr_tally_lit(s, s->window[s->strstart-1], bflush);
+        s->match_available = 0;
+    }
+    s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
+    if (flush == Z_FINISH) {
+        FLUSH_BLOCK(s, 1);
+        return finish_done;
+    }
+    if (s->last_lit)
+        FLUSH_BLOCK(s, 0);
+    return block_done;
+}
+#endif /* FASTEST */
+
+/* ===========================================================================
+ * For Z_RLE, simply look for runs of bytes, generate matches only of distance
+ * one.  Do not maintain a hash table.  (It will be regenerated if this run of
+ * deflate switches away from Z_RLE.)
+ */
+local block_state deflate_rle(s, flush)
+    deflate_state *s;
+    int flush;
+{
+    int bflush;             /* set if current block must be flushed */
+    uInt prev;              /* byte at distance one to match */
+    Bytef *scan, *strend;   /* scan goes up to strend for length of run */
+
+    for (;;) {
+        /* Make sure that we always have enough lookahead, except
+         * at the end of the input file. We need MAX_MATCH bytes
+         * for the longest run, plus one for the unrolled loop.
+         */
+        if (s->lookahead <= MAX_MATCH) {
+            fill_window(s);
+            if (s->lookahead <= MAX_MATCH && flush == Z_NO_FLUSH) {
+                return need_more;
+            }
+            if (s->lookahead == 0) break; /* flush the current block */
+        }
+
+        /* See how many times the previous byte repeats */
+        s->match_length = 0;
+        if (s->lookahead >= MIN_MATCH && s->strstart > 0) {
+            scan = s->window + s->strstart - 1;
+            prev = *scan;
+            if (prev == *++scan && prev == *++scan && prev == *++scan) {
+                strend = s->window + s->strstart + MAX_MATCH;
+                do {
+                } while (prev == *++scan && prev == *++scan &&
+                         prev == *++scan && prev == *++scan &&
+                         prev == *++scan && prev == *++scan &&
+                         prev == *++scan && prev == *++scan &&
+                         scan < strend);
+                s->match_length = MAX_MATCH - (uInt)(strend - scan);
+                if (s->match_length > s->lookahead)
+                    s->match_length = s->lookahead;
+            }
+            Assert(scan <= s->window+(uInt)(s->window_size-1), "wild scan");
+        }
+
+        /* Emit match if have run of MIN_MATCH or longer, else emit literal */
+        if (s->match_length >= MIN_MATCH) {
+            check_match(s, s->strstart, s->strstart - 1, s->match_length);
+
+            _tr_tally_dist(s, 1, s->match_length - MIN_MATCH, bflush);
+
+            s->lookahead -= s->match_length;
+            s->strstart += s->match_length;
+            s->match_length = 0;
+        } else {
+            /* No match, output a literal byte */
+            Tracevv((stderr,"%c", s->window[s->strstart]));
+            _tr_tally_lit (s, s->window[s->strstart], bflush);
+            s->lookahead--;
+            s->strstart++;
+        }
+        if (bflush) FLUSH_BLOCK(s, 0);
+    }
+    s->insert = 0;
+    if (flush == Z_FINISH) {
+        FLUSH_BLOCK(s, 1);
+        return finish_done;
+    }
+    if (s->last_lit)
+        FLUSH_BLOCK(s, 0);
+    return block_done;
+}
+
+/* ===========================================================================
+ * For Z_HUFFMAN_ONLY, do not look for matches.  Do not maintain a hash table.
+ * (It will be regenerated if this run of deflate switches away from Huffman.)
+ */
+local block_state deflate_huff(s, flush)
+    deflate_state *s;
+    int flush;
+{
+    int bflush;             /* set if current block must be flushed */
+
+    for (;;) {
+        /* Make sure that we have a literal to write. */
+        if (s->lookahead == 0) {
+            fill_window(s);
+            if (s->lookahead == 0) {
+                if (flush == Z_NO_FLUSH)
+                    return need_more;
+                break;      /* flush the current block */
+            }
+        }
+
+        /* Output a literal byte */
+        s->match_length = 0;
+        Tracevv((stderr,"%c", s->window[s->strstart]));
+        _tr_tally_lit (s, s->window[s->strstart], bflush);
+        s->lookahead--;
+        s->strstart++;
+        if (bflush) FLUSH_BLOCK(s, 0);
+    }
+    s->insert = 0;
+    if (flush == Z_FINISH) {
+        FLUSH_BLOCK(s, 1);
+        return finish_done;
+    }
+    if (s->last_lit)
+        FLUSH_BLOCK(s, 0);
+    return block_done;
+}
diff --git a/libraries/zlib/deflate.h b/libraries/zlib/deflate.h
new file mode 100644
index 000000000..23ecdd312
--- /dev/null
+++ b/libraries/zlib/deflate.h
@@ -0,0 +1,349 @@
+/* deflate.h -- internal compression state
+ * Copyright (C) 1995-2016 Jean-loup Gailly
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+   part of the implementation of the compression library and is
+   subject to change. Applications should only use zlib.h.
+ */
+
+/* @(#) $Id$ */
+
+#ifndef DEFLATE_H
+#define DEFLATE_H
+
+#include "zutil.h"
+
+/* define NO_GZIP when compiling if you want to disable gzip header and
+   trailer creation by deflate().  NO_GZIP would be used to avoid linking in
+   the crc code when it is not needed.  For shared libraries, gzip encoding
+   should be left enabled. */
+#ifndef NO_GZIP
+#  define GZIP
+#endif
+
+/* ===========================================================================
+ * Internal compression state.
+ */
+
+#define LENGTH_CODES 29
+/* number of length codes, not counting the special END_BLOCK code */
+
+#define LITERALS  256
+/* number of literal bytes 0..255 */
+
+#define L_CODES (LITERALS+1+LENGTH_CODES)
+/* number of Literal or Length codes, including the END_BLOCK code */
+
+#define D_CODES   30
+/* number of distance codes */
+
+#define BL_CODES  19
+/* number of codes used to transfer the bit lengths */
+
+#define HEAP_SIZE (2*L_CODES+1)
+/* maximum heap size */
+
+#define MAX_BITS 15
+/* All codes must not exceed MAX_BITS bits */
+
+#define Buf_size 16
+/* size of bit buffer in bi_buf */
+
+#define INIT_STATE    42    /* zlib header -> BUSY_STATE */
+#ifdef GZIP
+#  define GZIP_STATE  57    /* gzip header -> BUSY_STATE | EXTRA_STATE */
+#endif
+#define EXTRA_STATE   69    /* gzip extra block -> NAME_STATE */
+#define NAME_STATE    73    /* gzip file name -> COMMENT_STATE */
+#define COMMENT_STATE 91    /* gzip comment -> HCRC_STATE */
+#define HCRC_STATE   103    /* gzip header CRC -> BUSY_STATE */
+#define BUSY_STATE   113    /* deflate -> FINISH_STATE */
+#define FINISH_STATE 666    /* stream complete */
+/* Stream status */
+
+
+/* Data structure describing a single value and its code string. */
+typedef struct ct_data_s {
+    union {
+        ush  freq;       /* frequency count */
+        ush  code;       /* bit string */
+    } fc;
+    union {
+        ush  dad;        /* father node in Huffman tree */
+        ush  len;        /* length of bit string */
+    } dl;
+} FAR ct_data;
+
+#define Freq fc.freq
+#define Code fc.code
+#define Dad  dl.dad
+#define Len  dl.len
+
+typedef struct static_tree_desc_s  static_tree_desc;
+
+typedef struct tree_desc_s {
+    ct_data *dyn_tree;           /* the dynamic tree */
+    int     max_code;            /* largest code with non zero frequency */
+    const static_tree_desc *stat_desc;  /* the corresponding static tree */
+} FAR tree_desc;
+
+typedef ush Pos;
+typedef Pos FAR Posf;
+typedef unsigned IPos;
+
+/* A Pos is an index in the character window. We use short instead of int to
+ * save space in the various tables. IPos is used only for parameter passing.
+ */
+
+typedef struct internal_state {
+    z_streamp strm;      /* pointer back to this zlib stream */
+    int   status;        /* as the name implies */
+    Bytef *pending_buf;  /* output still pending */
+    ulg   pending_buf_size; /* size of pending_buf */
+    Bytef *pending_out;  /* next pending byte to output to the stream */
+    ulg   pending;       /* nb of bytes in the pending buffer */
+    int   wrap;          /* bit 0 true for zlib, bit 1 true for gzip */
+    gz_headerp  gzhead;  /* gzip header information to write */
+    ulg   gzindex;       /* where in extra, name, or comment */
+    Byte  method;        /* can only be DEFLATED */
+    int   last_flush;    /* value of flush param for previous deflate call */
+
+                /* used by deflate.c: */
+
+    uInt  w_size;        /* LZ77 window size (32K by default) */
+    uInt  w_bits;        /* log2(w_size)  (8..16) */
+    uInt  w_mask;        /* w_size - 1 */
+
+    Bytef *window;
+    /* Sliding window. Input bytes are read into the second half of the window,
+     * and move to the first half later to keep a dictionary of at least wSize
+     * bytes. With this organization, matches are limited to a distance of
+     * wSize-MAX_MATCH bytes, but this ensures that IO is always
+     * performed with a length multiple of the block size. Also, it limits
+     * the window size to 64K, which is quite useful on MSDOS.
+     * To do: use the user input buffer as sliding window.
+     */
+
+    ulg window_size;
+    /* Actual size of window: 2*wSize, except when the user input buffer
+     * is directly used as sliding window.
+     */
+
+    Posf *prev;
+    /* Link to older string with same hash index. To limit the size of this
+     * array to 64K, this link is maintained only for the last 32K strings.
+     * An index in this array is thus a window index modulo 32K.
+     */
+
+    Posf *head; /* Heads of the hash chains or NIL. */
+
+    uInt  ins_h;          /* hash index of string to be inserted */
+    uInt  hash_size;      /* number of elements in hash table */
+    uInt  hash_bits;      /* log2(hash_size) */
+    uInt  hash_mask;      /* hash_size-1 */
+
+    uInt  hash_shift;
+    /* Number of bits by which ins_h must be shifted at each input
+     * step. It must be such that after MIN_MATCH steps, the oldest
+     * byte no longer takes part in the hash key, that is:
+     *   hash_shift * MIN_MATCH >= hash_bits
+     */
+
+    long block_start;
+    /* Window position at the beginning of the current output block. Gets
+     * negative when the window is moved backwards.
+     */
+
+    uInt match_length;           /* length of best match */
+    IPos prev_match;             /* previous match */
+    int match_available;         /* set if previous match exists */
+    uInt strstart;               /* start of string to insert */
+    uInt match_start;            /* start of matching string */
+    uInt lookahead;              /* number of valid bytes ahead in window */
+
+    uInt prev_length;
+    /* Length of the best match at previous step. Matches not greater than this
+     * are discarded. This is used in the lazy match evaluation.
+     */
+
+    uInt max_chain_length;
+    /* To speed up deflation, hash chains are never searched beyond this
+     * length.  A higher limit improves compression ratio but degrades the
+     * speed.
+     */
+
+    uInt max_lazy_match;
+    /* Attempt to find a better match only when the current match is strictly
+     * smaller than this value. This mechanism is used only for compression
+     * levels >= 4.
+     */
+#   define max_insert_length  max_lazy_match
+    /* Insert new strings in the hash table only if the match length is not
+     * greater than this length. This saves time but degrades compression.
+     * max_insert_length is used only for compression levels <= 3.
+     */
+
+    int level;    /* compression level (1..9) */
+    int strategy; /* favor or force Huffman coding*/
+
+    uInt good_match;
+    /* Use a faster search when the previous match is longer than this */
+
+    int nice_match; /* Stop searching when current match exceeds this */
+
+                /* used by trees.c: */
+    /* Didn't use ct_data typedef below to suppress compiler warning */
+    struct ct_data_s dyn_ltree[HEAP_SIZE];   /* literal and length tree */
+    struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */
+    struct ct_data_s bl_tree[2*BL_CODES+1];  /* Huffman tree for bit lengths */
+
+    struct tree_desc_s l_desc;               /* desc. for literal tree */
+    struct tree_desc_s d_desc;               /* desc. for distance tree */
+    struct tree_desc_s bl_desc;              /* desc. for bit length tree */
+
+    ush bl_count[MAX_BITS+1];
+    /* number of codes at each bit length for an optimal tree */
+
+    int heap[2*L_CODES+1];      /* heap used to build the Huffman trees */
+    int heap_len;               /* number of elements in the heap */
+    int heap_max;               /* element of largest frequency */
+    /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
+     * The same heap array is used to build all trees.
+     */
+
+    uch depth[2*L_CODES+1];
+    /* Depth of each subtree used as tie breaker for trees of equal frequency
+     */
+
+    uchf *l_buf;          /* buffer for literals or lengths */
+
+    uInt  lit_bufsize;
+    /* Size of match buffer for literals/lengths.  There are 4 reasons for
+     * limiting lit_bufsize to 64K:
+     *   - frequencies can be kept in 16 bit counters
+     *   - if compression is not successful for the first block, all input
+     *     data is still in the window so we can still emit a stored block even
+     *     when input comes from standard input.  (This can also be done for
+     *     all blocks if lit_bufsize is not greater than 32K.)
+     *   - if compression is not successful for a file smaller than 64K, we can
+     *     even emit a stored file instead of a stored block (saving 5 bytes).
+     *     This is applicable only for zip (not gzip or zlib).
+     *   - creating new Huffman trees less frequently may not provide fast
+     *     adaptation to changes in the input data statistics. (Take for
+     *     example a binary file with poorly compressible code followed by
+     *     a highly compressible string table.) Smaller buffer sizes give
+     *     fast adaptation but have of course the overhead of transmitting
+     *     trees more frequently.
+     *   - I can't count above 4
+     */
+
+    uInt last_lit;      /* running index in l_buf */
+
+    ushf *d_buf;
+    /* Buffer for distances. To simplify the code, d_buf and l_buf have
+     * the same number of elements. To use different lengths, an extra flag
+     * array would be necessary.
+     */
+
+    ulg opt_len;        /* bit length of current block with optimal trees */
+    ulg static_len;     /* bit length of current block with static trees */
+    uInt matches;       /* number of string matches in current block */
+    uInt insert;        /* bytes at end of window left to insert */
+
+#ifdef ZLIB_DEBUG
+    ulg compressed_len; /* total bit length of compressed file mod 2^32 */
+    ulg bits_sent;      /* bit length of compressed data sent mod 2^32 */
+#endif
+
+    ush bi_buf;
+    /* Output buffer. bits are inserted starting at the bottom (least
+     * significant bits).
+     */
+    int bi_valid;
+    /* Number of valid bits in bi_buf.  All bits above the last valid bit
+     * are always zero.
+     */
+
+    ulg high_water;
+    /* High water mark offset in window for initialized bytes -- bytes above
+     * this are set to zero in order to avoid memory check warnings when
+     * longest match routines access bytes past the input.  This is then
+     * updated to the new high water mark.
+     */
+
+} FAR deflate_state;
+
+/* Output a byte on the stream.
+ * IN assertion: there is enough room in pending_buf.
+ */
+#define put_byte(s, c) {s->pending_buf[s->pending++] = (Bytef)(c);}
+
+
+#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
+/* Minimum amount of lookahead, except at the end of the input file.
+ * See deflate.c for comments about the MIN_MATCH+1.
+ */
+
+#define MAX_DIST(s)  ((s)->w_size-MIN_LOOKAHEAD)
+/* In order to simplify the code, particularly on 16 bit machines, match
+ * distances are limited to MAX_DIST instead of WSIZE.
+ */
+
+#define WIN_INIT MAX_MATCH
+/* Number of bytes after end of data in window to initialize in order to avoid
+   memory checker errors from longest match routines */
+
+        /* in trees.c */
+void ZLIB_INTERNAL _tr_init OF((deflate_state *s));
+int ZLIB_INTERNAL _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc));
+void ZLIB_INTERNAL _tr_flush_block OF((deflate_state *s, charf *buf,
+                        ulg stored_len, int last));
+void ZLIB_INTERNAL _tr_flush_bits OF((deflate_state *s));
+void ZLIB_INTERNAL _tr_align OF((deflate_state *s));
+void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf,
+                        ulg stored_len, int last));
+
+#define d_code(dist) \
+   ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)])
+/* Mapping from a distance to a distance code. dist is the distance - 1 and
+ * must not have side effects. _dist_code[256] and _dist_code[257] are never
+ * used.
+ */
+
+#ifndef ZLIB_DEBUG
+/* Inline versions of _tr_tally for speed: */
+
+#if defined(GEN_TREES_H) || !defined(STDC)
+  extern uch ZLIB_INTERNAL _length_code[];
+  extern uch ZLIB_INTERNAL _dist_code[];
+#else
+  extern const uch ZLIB_INTERNAL _length_code[];
+  extern const uch ZLIB_INTERNAL _dist_code[];
+#endif
+
+# define _tr_tally_lit(s, c, flush) \
+  { uch cc = (c); \
+    s->d_buf[s->last_lit] = 0; \
+    s->l_buf[s->last_lit++] = cc; \
+    s->dyn_ltree[cc].Freq++; \
+    flush = (s->last_lit == s->lit_bufsize-1); \
+   }
+# define _tr_tally_dist(s, distance, length, flush) \
+  { uch len = (uch)(length); \
+    ush dist = (ush)(distance); \
+    s->d_buf[s->last_lit] = dist; \
+    s->l_buf[s->last_lit++] = len; \
+    dist--; \
+    s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
+    s->dyn_dtree[d_code(dist)].Freq++; \
+    flush = (s->last_lit == s->lit_bufsize-1); \
+  }
+#else
+# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
+# define _tr_tally_dist(s, distance, length, flush) \
+              flush = _tr_tally(s, distance, length)
+#endif
+
+#endif /* DEFLATE_H */
diff --git a/libraries/zlib/example.c b/libraries/zlib/example.c
new file mode 100644
index 000000000..604736f15
--- /dev/null
+++ b/libraries/zlib/example.c
@@ -0,0 +1,565 @@
+/* example.c -- usage example of the zlib compression library
+ * Copyright (C) 1995-2006 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* @(#) $Id$ */
+
+#include "zlib.h"
+#include <stdio.h>
+
+#ifdef STDC
+#  include <string.h>
+#  include <stdlib.h>
+#endif
+
+#if defined(VMS) || defined(RISCOS)
+#  define TESTFILE "foo-gz"
+#else
+#  define TESTFILE "foo.gz"
+#endif
+
+#define CHECK_ERR(err, msg) { \
+    if (err != Z_OK) { \
+        fprintf(stderr, "%s error: %d\n", msg, err); \
+        exit(1); \
+    } \
+}
+
+const char hello[] = "hello, hello!";
+/* "hello world" would be more standard, but the repeated "hello"
+ * stresses the compression code better, sorry...
+ */
+
+const char dictionary[] = "hello";
+uLong dictId; /* Adler32 value of the dictionary */
+
+void test_compress      OF((Byte *compr, uLong comprLen,
+                            Byte *uncompr, uLong uncomprLen));
+void test_gzio          OF((const char *fname,
+                            Byte *uncompr, uLong uncomprLen));
+void test_deflate       OF((Byte *compr, uLong comprLen));
+void test_inflate       OF((Byte *compr, uLong comprLen,
+                            Byte *uncompr, uLong uncomprLen));
+void test_large_deflate OF((Byte *compr, uLong comprLen,
+                            Byte *uncompr, uLong uncomprLen));
+void test_large_inflate OF((Byte *compr, uLong comprLen,
+                            Byte *uncompr, uLong uncomprLen));
+void test_flush         OF((Byte *compr, uLong *comprLen));
+void test_sync          OF((Byte *compr, uLong comprLen,
+                            Byte *uncompr, uLong uncomprLen));
+void test_dict_deflate  OF((Byte *compr, uLong comprLen));
+void test_dict_inflate  OF((Byte *compr, uLong comprLen,
+                            Byte *uncompr, uLong uncomprLen));
+int  main               OF((int argc, char *argv[]));
+
+/* ===========================================================================
+ * Test compress() and uncompress()
+ */
+void test_compress(compr, comprLen, uncompr, uncomprLen)
+    Byte *compr, *uncompr;
+    uLong comprLen, uncomprLen;
+{
+    int err;
+    uLong len = (uLong)strlen(hello)+1;
+
+    err = compress(compr, &comprLen, (const Bytef*)hello, len);
+    CHECK_ERR(err, "compress");
+
+    strcpy((char*)uncompr, "garbage");
+
+    err = uncompress(uncompr, &uncomprLen, compr, comprLen);
+    CHECK_ERR(err, "uncompress");
+
+    if (strcmp((char*)uncompr, hello)) {
+        fprintf(stderr, "bad uncompress\n");
+        exit(1);
+    } else {
+        printf("uncompress(): %s\n", (char *)uncompr);
+    }
+}
+
+/* ===========================================================================
+ * Test read/write of .gz files
+ */
+void test_gzio(fname, uncompr, uncomprLen)
+    const char *fname; /* compressed file name */
+    Byte *uncompr;
+    uLong uncomprLen;
+{
+#ifdef NO_GZCOMPRESS
+    fprintf(stderr, "NO_GZCOMPRESS -- gz* functions cannot compress\n");
+#else
+    int err;
+    int len = (int)strlen(hello)+1;
+    gzFile file;
+    z_off_t pos;
+
+    file = gzopen(fname, "wb");
+    if (file == NULL) {
+        fprintf(stderr, "gzopen error\n");
+        exit(1);
+    }
+    gzputc(file, 'h');
+    if (gzputs(file, "ello") != 4) {
+        fprintf(stderr, "gzputs err: %s\n", gzerror(file, &err));
+        exit(1);
+    }
+    if (gzprintf(file, ", %s!", "hello") != 8) {
+        fprintf(stderr, "gzprintf err: %s\n", gzerror(file, &err));
+        exit(1);
+    }
+    gzseek(file, 1L, SEEK_CUR); /* add one zero byte */
+    gzclose(file);
+
+    file = gzopen(fname, "rb");
+    if (file == NULL) {
+        fprintf(stderr, "gzopen error\n");
+        exit(1);
+    }
+    strcpy((char*)uncompr, "garbage");
+
+    if (gzread(file, uncompr, (unsigned)uncomprLen) != len) {
+        fprintf(stderr, "gzread err: %s\n", gzerror(file, &err));
+        exit(1);
+    }
+    if (strcmp((char*)uncompr, hello)) {
+        fprintf(stderr, "bad gzread: %s\n", (char*)uncompr);
+        exit(1);
+    } else {
+        printf("gzread(): %s\n", (char*)uncompr);
+    }
+
+    pos = gzseek(file, -8L, SEEK_CUR);
+    if (pos != 6 || gztell(file) != pos) {
+        fprintf(stderr, "gzseek error, pos=%ld, gztell=%ld\n",
+                (long)pos, (long)gztell(file));
+        exit(1);
+    }
+
+    if (gzgetc(file) != ' ') {
+        fprintf(stderr, "gzgetc error\n");
+        exit(1);
+    }
+
+    if (gzungetc(' ', file) != ' ') {
+        fprintf(stderr, "gzungetc error\n");
+        exit(1);
+    }
+
+    gzgets(file, (char*)uncompr, (int)uncomprLen);
+    if (strlen((char*)uncompr) != 7) { /* " hello!" */
+        fprintf(stderr, "gzgets err after gzseek: %s\n", gzerror(file, &err));
+        exit(1);
+    }
+    if (strcmp((char*)uncompr, hello + 6)) {
+        fprintf(stderr, "bad gzgets after gzseek\n");
+        exit(1);
+    } else {
+        printf("gzgets() after gzseek: %s\n", (char*)uncompr);
+    }
+
+    gzclose(file);
+#endif
+}
+
+/* ===========================================================================
+ * Test deflate() with small buffers
+ */
+void test_deflate(compr, comprLen)
+    Byte *compr;
+    uLong comprLen;
+{
+    z_stream c_stream; /* compression stream */
+    int err;
+    uLong len = (uLong)strlen(hello)+1;
+
+    c_stream.zalloc = (alloc_func)0;
+    c_stream.zfree = (free_func)0;
+    c_stream.opaque = (voidpf)0;
+
+    err = deflateInit(&c_stream, Z_DEFAULT_COMPRESSION);
+    CHECK_ERR(err, "deflateInit");
+
+    c_stream.next_in  = (Bytef*)hello;
+    c_stream.next_out = compr;
+
+    while (c_stream.total_in != len && c_stream.total_out < comprLen) {
+        c_stream.avail_in = c_stream.avail_out = 1; /* force small buffers */
+        err = deflate(&c_stream, Z_NO_FLUSH);
+        CHECK_ERR(err, "deflate");
+    }
+    /* Finish the stream, still forcing small buffers: */
+    for (;;) {
+        c_stream.avail_out = 1;
+        err = deflate(&c_stream, Z_FINISH);
+        if (err == Z_STREAM_END) break;
+        CHECK_ERR(err, "deflate");
+    }
+
+    err = deflateEnd(&c_stream);
+    CHECK_ERR(err, "deflateEnd");
+}
+
+/* ===========================================================================
+ * Test inflate() with small buffers
+ */
+void test_inflate(compr, comprLen, uncompr, uncomprLen)
+    Byte *compr, *uncompr;
+    uLong comprLen, uncomprLen;
+{
+    int err;
+    z_stream d_stream; /* decompression stream */
+
+    strcpy((char*)uncompr, "garbage");
+
+    d_stream.zalloc = (alloc_func)0;
+    d_stream.zfree = (free_func)0;
+    d_stream.opaque = (voidpf)0;
+
+    d_stream.next_in  = compr;
+    d_stream.avail_in = 0;
+    d_stream.next_out = uncompr;
+
+    err = inflateInit(&d_stream);
+    CHECK_ERR(err, "inflateInit");
+
+    while (d_stream.total_out < uncomprLen && d_stream.total_in < comprLen) {
+        d_stream.avail_in = d_stream.avail_out = 1; /* force small buffers */
+        err = inflate(&d_stream, Z_NO_FLUSH);
+        if (err == Z_STREAM_END) break;
+        CHECK_ERR(err, "inflate");
+    }
+
+    err = inflateEnd(&d_stream);
+    CHECK_ERR(err, "inflateEnd");
+
+    if (strcmp((char*)uncompr, hello)) {
+        fprintf(stderr, "bad inflate\n");
+        exit(1);
+    } else {
+        printf("inflate(): %s\n", (char *)uncompr);
+    }
+}
+
+/* ===========================================================================
+ * Test deflate() with large buffers and dynamic change of compression level
+ */
+void test_large_deflate(compr, comprLen, uncompr, uncomprLen)
+    Byte *compr, *uncompr;
+    uLong comprLen, uncomprLen;
+{
+    z_stream c_stream; /* compression stream */
+    int err;
+
+    c_stream.zalloc = (alloc_func)0;
+    c_stream.zfree = (free_func)0;
+    c_stream.opaque = (voidpf)0;
+
+    err = deflateInit(&c_stream, Z_BEST_SPEED);
+    CHECK_ERR(err, "deflateInit");
+
+    c_stream.next_out = compr;
+    c_stream.avail_out = (uInt)comprLen;
+
+    /* At this point, uncompr is still mostly zeroes, so it should compress
+     * very well:
+     */
+    c_stream.next_in = uncompr;
+    c_stream.avail_in = (uInt)uncomprLen;
+    err = deflate(&c_stream, Z_NO_FLUSH);
+    CHECK_ERR(err, "deflate");
+    if (c_stream.avail_in != 0) {
+        fprintf(stderr, "deflate not greedy\n");
+        exit(1);
+    }
+
+    /* Feed in already compressed data and switch to no compression: */
+    deflateParams(&c_stream, Z_NO_COMPRESSION, Z_DEFAULT_STRATEGY);
+    c_stream.next_in = compr;
+    c_stream.avail_in = (uInt)comprLen/2;
+    err = deflate(&c_stream, Z_NO_FLUSH);
+    CHECK_ERR(err, "deflate");
+
+    /* Switch back to compressing mode: */
+    deflateParams(&c_stream, Z_BEST_COMPRESSION, Z_FILTERED);
+    c_stream.next_in = uncompr;
+    c_stream.avail_in = (uInt)uncomprLen;
+    err = deflate(&c_stream, Z_NO_FLUSH);
+    CHECK_ERR(err, "deflate");
+
+    err = deflate(&c_stream, Z_FINISH);
+    if (err != Z_STREAM_END) {
+        fprintf(stderr, "deflate should report Z_STREAM_END\n");
+        exit(1);
+    }
+    err = deflateEnd(&c_stream);
+    CHECK_ERR(err, "deflateEnd");
+}
+
+/* ===========================================================================
+ * Test inflate() with large buffers
+ */
+void test_large_inflate(compr, comprLen, uncompr, uncomprLen)
+    Byte *compr, *uncompr;
+    uLong comprLen, uncomprLen;
+{
+    int err;
+    z_stream d_stream; /* decompression stream */
+
+    strcpy((char*)uncompr, "garbage");
+
+    d_stream.zalloc = (alloc_func)0;
+    d_stream.zfree = (free_func)0;
+    d_stream.opaque = (voidpf)0;
+
+    d_stream.next_in  = compr;
+    d_stream.avail_in = (uInt)comprLen;
+
+    err = inflateInit(&d_stream);
+    CHECK_ERR(err, "inflateInit");
+
+    for (;;) {
+        d_stream.next_out = uncompr;            /* discard the output */
+        d_stream.avail_out = (uInt)uncomprLen;
+        err = inflate(&d_stream, Z_NO_FLUSH);
+        if (err == Z_STREAM_END) break;
+        CHECK_ERR(err, "large inflate");
+    }
+
+    err = inflateEnd(&d_stream);
+    CHECK_ERR(err, "inflateEnd");
+
+    if (d_stream.total_out != 2*uncomprLen + comprLen/2) {
+        fprintf(stderr, "bad large inflate: %ld\n", d_stream.total_out);
+        exit(1);
+    } else {
+        printf("large_inflate(): OK\n");
+    }
+}
+
+/* ===========================================================================
+ * Test deflate() with full flush
+ */
+void test_flush(compr, comprLen)
+    Byte *compr;
+    uLong *comprLen;
+{
+    z_stream c_stream; /* compression stream */
+    int err;
+    uInt len = (uInt)strlen(hello)+1;
+
+    c_stream.zalloc = (alloc_func)0;
+    c_stream.zfree = (free_func)0;
+    c_stream.opaque = (voidpf)0;
+
+    err = deflateInit(&c_stream, Z_DEFAULT_COMPRESSION);
+    CHECK_ERR(err, "deflateInit");
+
+    c_stream.next_in  = (Bytef*)hello;
+    c_stream.next_out = compr;
+    c_stream.avail_in = 3;
+    c_stream.avail_out = (uInt)*comprLen;
+    err = deflate(&c_stream, Z_FULL_FLUSH);
+    CHECK_ERR(err, "deflate");
+
+    compr[3]++; /* force an error in first compressed block */
+    c_stream.avail_in = len - 3;
+
+    err = deflate(&c_stream, Z_FINISH);
+    if (err != Z_STREAM_END) {
+        CHECK_ERR(err, "deflate");
+    }
+    err = deflateEnd(&c_stream);
+    CHECK_ERR(err, "deflateEnd");
+
+    *comprLen = c_stream.total_out;
+}
+
+/* ===========================================================================
+ * Test inflateSync()
+ */
+void test_sync(compr, comprLen, uncompr, uncomprLen)
+    Byte *compr, *uncompr;
+    uLong comprLen, uncomprLen;
+{
+    int err;
+    z_stream d_stream; /* decompression stream */
+
+    strcpy((char*)uncompr, "garbage");
+
+    d_stream.zalloc = (alloc_func)0;
+    d_stream.zfree = (free_func)0;
+    d_stream.opaque = (voidpf)0;
+
+    d_stream.next_in  = compr;
+    d_stream.avail_in = 2; /* just read the zlib header */
+
+    err = inflateInit(&d_stream);
+    CHECK_ERR(err, "inflateInit");
+
+    d_stream.next_out = uncompr;
+    d_stream.avail_out = (uInt)uncomprLen;
+
+    inflate(&d_stream, Z_NO_FLUSH);
+    CHECK_ERR(err, "inflate");
+
+    d_stream.avail_in = (uInt)comprLen-2;   /* read all compressed data */
+    err = inflateSync(&d_stream);           /* but skip the damaged part */
+    CHECK_ERR(err, "inflateSync");
+
+    err = inflate(&d_stream, Z_FINISH);
+    if (err != Z_DATA_ERROR) {
+        fprintf(stderr, "inflate should report DATA_ERROR\n");
+        /* Because of incorrect adler32 */
+        exit(1);
+    }
+    err = inflateEnd(&d_stream);
+    CHECK_ERR(err, "inflateEnd");
+
+    printf("after inflateSync(): hel%s\n", (char *)uncompr);
+}
+
+/* ===========================================================================
+ * Test deflate() with preset dictionary
+ */
+void test_dict_deflate(compr, comprLen)
+    Byte *compr;
+    uLong comprLen;
+{
+    z_stream c_stream; /* compression stream */
+    int err;
+
+    c_stream.zalloc = (alloc_func)0;
+    c_stream.zfree = (free_func)0;
+    c_stream.opaque = (voidpf)0;
+
+    err = deflateInit(&c_stream, Z_BEST_COMPRESSION);
+    CHECK_ERR(err, "deflateInit");
+
+    err = deflateSetDictionary(&c_stream,
+                               (const Bytef*)dictionary, sizeof(dictionary));
+    CHECK_ERR(err, "deflateSetDictionary");
+
+    dictId = c_stream.adler;
+    c_stream.next_out = compr;
+    c_stream.avail_out = (uInt)comprLen;
+
+    c_stream.next_in = (Bytef*)hello;
+    c_stream.avail_in = (uInt)strlen(hello)+1;
+
+    err = deflate(&c_stream, Z_FINISH);
+    if (err != Z_STREAM_END) {
+        fprintf(stderr, "deflate should report Z_STREAM_END\n");
+        exit(1);
+    }
+    err = deflateEnd(&c_stream);
+    CHECK_ERR(err, "deflateEnd");
+}
+
+/* ===========================================================================
+ * Test inflate() with a preset dictionary
+ */
+void test_dict_inflate(compr, comprLen, uncompr, uncomprLen)
+    Byte *compr, *uncompr;
+    uLong comprLen, uncomprLen;
+{
+    int err;
+    z_stream d_stream; /* decompression stream */
+
+    strcpy((char*)uncompr, "garbage");
+
+    d_stream.zalloc = (alloc_func)0;
+    d_stream.zfree = (free_func)0;
+    d_stream.opaque = (voidpf)0;
+
+    d_stream.next_in  = compr;
+    d_stream.avail_in = (uInt)comprLen;
+
+    err = inflateInit(&d_stream);
+    CHECK_ERR(err, "inflateInit");
+
+    d_stream.next_out = uncompr;
+    d_stream.avail_out = (uInt)uncomprLen;
+
+    for (;;) {
+        err = inflate(&d_stream, Z_NO_FLUSH);
+        if (err == Z_STREAM_END) break;
+        if (err == Z_NEED_DICT) {
+            if (d_stream.adler != dictId) {
+                fprintf(stderr, "unexpected dictionary");
+                exit(1);
+            }
+            err = inflateSetDictionary(&d_stream, (const Bytef*)dictionary,
+                                       sizeof(dictionary));
+        }
+        CHECK_ERR(err, "inflate with dict");
+    }
+
+    err = inflateEnd(&d_stream);
+    CHECK_ERR(err, "inflateEnd");
+
+    if (strcmp((char*)uncompr, hello)) {
+        fprintf(stderr, "bad inflate with dict\n");
+        exit(1);
+    } else {
+        printf("inflate with dictionary: %s\n", (char *)uncompr);
+    }
+}
+
+/* ===========================================================================
+ * Usage:  example [output.gz  [input.gz]]
+ */
+
+int main(argc, argv)
+    int argc;
+    char *argv[];
+{
+    Byte *compr, *uncompr;
+    uLong comprLen = 10000*sizeof(int); /* don't overflow on MSDOS */
+    uLong uncomprLen = comprLen;
+    static const char* myVersion = ZLIB_VERSION;
+
+    if (zlibVersion()[0] != myVersion[0]) {
+        fprintf(stderr, "incompatible zlib version\n");
+        exit(1);
+
+    } else if (strcmp(zlibVersion(), ZLIB_VERSION) != 0) {
+        fprintf(stderr, "warning: different zlib version\n");
+    }
+
+    printf("zlib version %s = 0x%04x, compile flags = 0x%lx\n",
+            ZLIB_VERSION, ZLIB_VERNUM, zlibCompileFlags());
+
+    compr    = (Byte*)calloc((uInt)comprLen, 1);
+    uncompr  = (Byte*)calloc((uInt)uncomprLen, 1);
+    /* compr and uncompr are cleared to avoid reading uninitialized
+     * data and to ensure that uncompr compresses well.
+     */
+    if (compr == Z_NULL || uncompr == Z_NULL) {
+        printf("out of memory\n");
+        exit(1);
+    }
+    test_compress(compr, comprLen, uncompr, uncomprLen);
+
+    test_gzio((argc > 1 ? argv[1] : TESTFILE),
+              uncompr, uncomprLen);
+
+    test_deflate(compr, comprLen);
+    test_inflate(compr, comprLen, uncompr, uncomprLen);
+
+    test_large_deflate(compr, comprLen, uncompr, uncomprLen);
+    test_large_inflate(compr, comprLen, uncompr, uncomprLen);
+
+    test_flush(compr, &comprLen);
+    test_sync(compr, comprLen, uncompr, uncomprLen);
+    comprLen = uncomprLen;
+
+    test_dict_deflate(compr, comprLen);
+    test_dict_inflate(compr, comprLen, uncompr, uncomprLen);
+
+    free(compr);
+    free(uncompr);
+
+    return 0;
+}
diff --git a/libraries/zlib/gzguts.h b/libraries/zlib/gzguts.h
new file mode 100644
index 000000000..990a4d251
--- /dev/null
+++ b/libraries/zlib/gzguts.h
@@ -0,0 +1,218 @@
+/* gzguts.h -- zlib internal header definitions for gz* operations
+ * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifdef _LARGEFILE64_SOURCE
+#  ifndef _LARGEFILE_SOURCE
+#    define _LARGEFILE_SOURCE 1
+#  endif
+#  ifdef _FILE_OFFSET_BITS
+#    undef _FILE_OFFSET_BITS
+#  endif
+#endif
+
+#ifdef HAVE_HIDDEN
+#  define ZLIB_INTERNAL __attribute__((visibility ("hidden")))
+#else
+#  define ZLIB_INTERNAL
+#endif
+
+#include <stdio.h>
+#include "zlib.h"
+#ifdef STDC
+#  include <string.h>
+#  include <stdlib.h>
+#  include <limits.h>
+#endif
+
+#ifndef _POSIX_SOURCE
+#  define _POSIX_SOURCE
+#endif
+#include <fcntl.h>
+
+#ifdef _WIN32
+#  include <stddef.h>
+#endif
+
+#if defined(__TURBOC__) || defined(_MSC_VER) || defined(_WIN32)
+#  include <io.h>
+#endif
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+#  define WIDECHAR
+#endif
+
+#ifdef WINAPI_FAMILY
+#  define open _open
+#  define read _read
+#  define write _write
+#  define close _close
+#endif
+
+#ifdef NO_DEFLATE       /* for compatibility with old definition */
+#  define NO_GZCOMPRESS
+#endif
+
+#if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550)
+#  ifndef HAVE_VSNPRINTF
+#    define HAVE_VSNPRINTF
+#  endif
+#endif
+
+#if defined(__CYGWIN__)
+#  ifndef HAVE_VSNPRINTF
+#    define HAVE_VSNPRINTF
+#  endif
+#endif
+
+#if defined(MSDOS) && defined(__BORLANDC__) && (BORLANDC > 0x410)
+#  ifndef HAVE_VSNPRINTF
+#    define HAVE_VSNPRINTF
+#  endif
+#endif
+
+#ifndef HAVE_VSNPRINTF
+#  ifdef MSDOS
+/* vsnprintf may exist on some MS-DOS compilers (DJGPP?),
+   but for now we just assume it doesn't. */
+#    define NO_vsnprintf
+#  endif
+#  ifdef __TURBOC__
+#    define NO_vsnprintf
+#  endif
+#  ifdef WIN32
+/* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */
+#    if !defined(vsnprintf) && !defined(NO_vsnprintf)
+#      if !defined(_MSC_VER) || ( defined(_MSC_VER) && _MSC_VER < 1500 )
+#         define vsnprintf _vsnprintf
+#      endif
+#    endif
+#  endif
+#  ifdef __SASC
+#    define NO_vsnprintf
+#  endif
+#  ifdef VMS
+#    define NO_vsnprintf
+#  endif
+#  ifdef __OS400__
+#    define NO_vsnprintf
+#  endif
+#  ifdef __MVS__
+#    define NO_vsnprintf
+#  endif
+#endif
+
+/* unlike snprintf (which is required in C99), _snprintf does not guarantee
+   null termination of the result -- however this is only used in gzlib.c where
+   the result is assured to fit in the space provided */
+#if defined(_MSC_VER) && _MSC_VER < 1900
+#  define snprintf _snprintf
+#endif
+
+#ifndef local
+#  define local static
+#endif
+/* since "static" is used to mean two completely different things in C, we
+   define "local" for the non-static meaning of "static", for readability
+   (compile with -Dlocal if your debugger can't find static symbols) */
+
+/* gz* functions always use library allocation functions */
+#ifndef STDC
+  extern voidp  malloc OF((uInt size));
+  extern void   free   OF((voidpf ptr));
+#endif
+
+/* get errno and strerror definition */
+#if defined UNDER_CE
+#  include <windows.h>
+#  define zstrerror() gz_strwinerror((DWORD)GetLastError())
+#else
+#  ifndef NO_STRERROR
+#    include <errno.h>
+#    define zstrerror() strerror(errno)
+#  else
+#    define zstrerror() "stdio error (consult errno)"
+#  endif
+#endif
+
+/* provide prototypes for these when building zlib without LFS */
+#if !defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0
+    ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *));
+    ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int));
+    ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile));
+    ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile));
+#endif
+
+/* default memLevel */
+#if MAX_MEM_LEVEL >= 8
+#  define DEF_MEM_LEVEL 8
+#else
+#  define DEF_MEM_LEVEL  MAX_MEM_LEVEL
+#endif
+
+/* default i/o buffer size -- double this for output when reading (this and
+   twice this must be able to fit in an unsigned type) */
+#define GZBUFSIZE 8192
+
+/* gzip modes, also provide a little integrity check on the passed structure */
+#define GZ_NONE 0
+#define GZ_READ 7247
+#define GZ_WRITE 31153
+#define GZ_APPEND 1     /* mode set to GZ_WRITE after the file is opened */
+
+/* values for gz_state how */
+#define LOOK 0      /* look for a gzip header */
+#define COPY 1      /* copy input directly */
+#define GZIP 2      /* decompress a gzip stream */
+
+/* internal gzip file state data structure */
+typedef struct {
+        /* exposed contents for gzgetc() macro */
+    struct gzFile_s x;      /* "x" for exposed */
+                            /* x.have: number of bytes available at x.next */
+                            /* x.next: next output data to deliver or write */
+                            /* x.pos: current position in uncompressed data */
+        /* used for both reading and writing */
+    int mode;               /* see gzip modes above */
+    int fd;                 /* file descriptor */
+    char *path;             /* path or fd for error messages */
+    unsigned size;          /* buffer size, zero if not allocated yet */
+    unsigned want;          /* requested buffer size, default is GZBUFSIZE */
+    unsigned char *in;      /* input buffer (double-sized when writing) */
+    unsigned char *out;     /* output buffer (double-sized when reading) */
+    int direct;             /* 0 if processing gzip, 1 if transparent */
+        /* just for reading */
+    int how;                /* 0: get header, 1: copy, 2: decompress */
+    z_off64_t start;        /* where the gzip data started, for rewinding */
+    int eof;                /* true if end of input file reached */
+    int past;               /* true if read requested past end */
+        /* just for writing */
+    int level;              /* compression level */
+    int strategy;           /* compression strategy */
+        /* seek request */
+    z_off64_t skip;         /* amount to skip (already rewound if backwards) */
+    int seek;               /* true if seek request pending */
+        /* error information */
+    int err;                /* error code */
+    char *msg;              /* error message */
+        /* zlib inflate or deflate stream */
+    z_stream strm;          /* stream structure in-place (not a pointer) */
+} gz_state;
+typedef gz_state FAR *gz_statep;
+
+/* shared functions */
+void ZLIB_INTERNAL gz_error OF((gz_statep, int, const char *));
+#if defined UNDER_CE
+char ZLIB_INTERNAL *gz_strwinerror OF((DWORD error));
+#endif
+
+/* GT_OFF(x), where x is an unsigned value, is true if x > maximum z_off64_t
+   value -- needed when comparing unsigned to z_off64_t, which is signed
+   (possible z_off64_t types off_t, off64_t, and long are all signed) */
+#ifdef INT_MAX
+#  define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > INT_MAX)
+#else
+unsigned ZLIB_INTERNAL gz_intmax OF((void));
+#  define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > gz_intmax())
+#endif
diff --git a/libraries/zlib/infback.c b/libraries/zlib/infback.c
new file mode 100644
index 000000000..59679ecbf
--- /dev/null
+++ b/libraries/zlib/infback.c
@@ -0,0 +1,640 @@
+/* infback.c -- inflate using a call-back interface
+ * Copyright (C) 1995-2016 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/*
+   This code is largely copied from inflate.c.  Normally either infback.o or
+   inflate.o would be linked into an application--not both.  The interface
+   with inffast.c is retained so that optimized assembler-coded versions of
+   inflate_fast() can be used with either inflate.c or infback.c.
+ */
+
+#include "zutil.h"
+#include "inftrees.h"
+#include "inflate.h"
+#include "inffast.h"
+
+/* function prototypes */
+local void fixedtables OF((struct inflate_state FAR *state));
+
+/*
+   strm provides memory allocation functions in zalloc and zfree, or
+   Z_NULL to use the library memory allocation functions.
+
+   windowBits is in the range 8..15, and window is a user-supplied
+   window and output buffer that is 2**windowBits bytes.
+ */
+int ZEXPORT inflateBackInit_(strm, windowBits, window, version, stream_size)
+z_streamp strm;
+int windowBits;
+unsigned char FAR *window;
+const char *version;
+int stream_size;
+{
+    struct inflate_state FAR *state;
+
+    if (version == Z_NULL || version[0] != ZLIB_VERSION[0] ||
+        stream_size != (int)(sizeof(z_stream)))
+        return Z_VERSION_ERROR;
+    if (strm == Z_NULL || window == Z_NULL ||
+        windowBits < 8 || windowBits > 15)
+        return Z_STREAM_ERROR;
+    strm->msg = Z_NULL;                 /* in case we return an error */
+    if (strm->zalloc == (alloc_func)0) {
+#ifdef Z_SOLO
+        return Z_STREAM_ERROR;
+#else
+        strm->zalloc = zcalloc;
+        strm->opaque = (voidpf)0;
+#endif
+    }
+    if (strm->zfree == (free_func)0)
+#ifdef Z_SOLO
+        return Z_STREAM_ERROR;
+#else
+    strm->zfree = zcfree;
+#endif
+    state = (struct inflate_state FAR *)ZALLOC(strm, 1,
+                                               sizeof(struct inflate_state));
+    if (state == Z_NULL) return Z_MEM_ERROR;
+    Tracev((stderr, "inflate: allocated\n"));
+    strm->state = (struct internal_state FAR *)state;
+    state->dmax = 32768U;
+    state->wbits = (uInt)windowBits;
+    state->wsize = 1U << windowBits;
+    state->window = window;
+    state->wnext = 0;
+    state->whave = 0;
+    return Z_OK;
+}
+
+/*
+   Return state with length and distance decoding tables and index sizes set to
+   fixed code decoding.  Normally this returns fixed tables from inffixed.h.
+   If BUILDFIXED is defined, then instead this routine builds the tables the
+   first time it's called, and returns those tables the first time and
+   thereafter.  This reduces the size of the code by about 2K bytes, in
+   exchange for a little execution time.  However, BUILDFIXED should not be
+   used for threaded applications, since the rewriting of the tables and virgin
+   may not be thread-safe.
+ */
+local void fixedtables(state)
+struct inflate_state FAR *state;
+{
+#ifdef BUILDFIXED
+    static int virgin = 1;
+    static code *lenfix, *distfix;
+    static code fixed[544];
+
+    /* build fixed huffman tables if first call (may not be thread safe) */
+    if (virgin) {
+        unsigned sym, bits;
+        static code *next;
+
+        /* literal/length table */
+        sym = 0;
+        while (sym < 144) state->lens[sym++] = 8;
+        while (sym < 256) state->lens[sym++] = 9;
+        while (sym < 280) state->lens[sym++] = 7;
+        while (sym < 288) state->lens[sym++] = 8;
+        next = fixed;
+        lenfix = next;
+        bits = 9;
+        inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work);
+
+        /* distance table */
+        sym = 0;
+        while (sym < 32) state->lens[sym++] = 5;
+        distfix = next;
+        bits = 5;
+        inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work);
+
+        /* do this just once */
+        virgin = 0;
+    }
+#else /* !BUILDFIXED */
+#   include "inffixed.h"
+#endif /* BUILDFIXED */
+    state->lencode = lenfix;
+    state->lenbits = 9;
+    state->distcode = distfix;
+    state->distbits = 5;
+}
+
+/* Macros for inflateBack(): */
+
+/* Load returned state from inflate_fast() */
+#define LOAD() \
+    do { \
+        put = strm->next_out; \
+        left = strm->avail_out; \
+        next = strm->next_in; \
+        have = strm->avail_in; \
+        hold = state->hold; \
+        bits = state->bits; \
+    } while (0)
+
+/* Set state from registers for inflate_fast() */
+#define RESTORE() \
+    do { \
+        strm->next_out = put; \
+        strm->avail_out = left; \
+        strm->next_in = next; \
+        strm->avail_in = have; \
+        state->hold = hold; \
+        state->bits = bits; \
+    } while (0)
+
+/* Clear the input bit accumulator */
+#define INITBITS() \
+    do { \
+        hold = 0; \
+        bits = 0; \
+    } while (0)
+
+/* Assure that some input is available.  If input is requested, but denied,
+   then return a Z_BUF_ERROR from inflateBack(). */
+#define PULL() \
+    do { \
+        if (have == 0) { \
+            have = in(in_desc, &next); \
+            if (have == 0) { \
+                next = Z_NULL; \
+                ret = Z_BUF_ERROR; \
+                goto inf_leave; \
+            } \
+        } \
+    } while (0)
+
+/* Get a byte of input into the bit accumulator, or return from inflateBack()
+   with an error if there is no input available. */
+#define PULLBYTE() \
+    do { \
+        PULL(); \
+        have--; \
+        hold += (unsigned long)(*next++) << bits; \
+        bits += 8; \
+    } while (0)
+
+/* Assure that there are at least n bits in the bit accumulator.  If there is
+   not enough available input to do that, then return from inflateBack() with
+   an error. */
+#define NEEDBITS(n) \
+    do { \
+        while (bits < (unsigned)(n)) \
+            PULLBYTE(); \
+    } while (0)
+
+/* Return the low n bits of the bit accumulator (n < 16) */
+#define BITS(n) \
+    ((unsigned)hold & ((1U << (n)) - 1))
+
+/* Remove n bits from the bit accumulator */
+#define DROPBITS(n) \
+    do { \
+        hold >>= (n); \
+        bits -= (unsigned)(n); \
+    } while (0)
+
+/* Remove zero to seven bits as needed to go to a byte boundary */
+#define BYTEBITS() \
+    do { \
+        hold >>= bits & 7; \
+        bits -= bits & 7; \
+    } while (0)
+
+/* Assure that some output space is available, by writing out the window
+   if it's full.  If the write fails, return from inflateBack() with a
+   Z_BUF_ERROR. */
+#define ROOM() \
+    do { \
+        if (left == 0) { \
+            put = state->window; \
+            left = state->wsize; \
+            state->whave = left; \
+            if (out(out_desc, put, left)) { \
+                ret = Z_BUF_ERROR; \
+                goto inf_leave; \
+            } \
+        } \
+    } while (0)
+
+/*
+   strm provides the memory allocation functions and window buffer on input,
+   and provides information on the unused input on return.  For Z_DATA_ERROR
+   returns, strm will also provide an error message.
+
+   in() and out() are the call-back input and output functions.  When
+   inflateBack() needs more input, it calls in().  When inflateBack() has
+   filled the window with output, or when it completes with data in the
+   window, it calls out() to write out the data.  The application must not
+   change the provided input until in() is called again or inflateBack()
+   returns.  The application must not change the window/output buffer until
+   inflateBack() returns.
+
+   in() and out() are called with a descriptor parameter provided in the
+   inflateBack() call.  This parameter can be a structure that provides the
+   information required to do the read or write, as well as accumulated
+   information on the input and output such as totals and check values.
+
+   in() should return zero on failure.  out() should return non-zero on
+   failure.  If either in() or out() fails, than inflateBack() returns a
+   Z_BUF_ERROR.  strm->next_in can be checked for Z_NULL to see whether it
+   was in() or out() that caused in the error.  Otherwise,  inflateBack()
+   returns Z_STREAM_END on success, Z_DATA_ERROR for an deflate format
+   error, or Z_MEM_ERROR if it could not allocate memory for the state.
+   inflateBack() can also return Z_STREAM_ERROR if the input parameters
+   are not correct, i.e. strm is Z_NULL or the state was not initialized.
+ */
+int ZEXPORT inflateBack(strm, in, in_desc, out, out_desc)
+z_streamp strm;
+in_func in;
+void FAR *in_desc;
+out_func out;
+void FAR *out_desc;
+{
+    struct inflate_state FAR *state;
+    z_const unsigned char FAR *next;    /* next input */
+    unsigned char FAR *put;     /* next output */
+    unsigned have, left;        /* available input and output */
+    unsigned long hold;         /* bit buffer */
+    unsigned bits;              /* bits in bit buffer */
+    unsigned copy;              /* number of stored or match bytes to copy */
+    unsigned char FAR *from;    /* where to copy match bytes from */
+    code here;                  /* current decoding table entry */
+    code last;                  /* parent table entry */
+    unsigned len;               /* length to copy for repeats, bits to drop */
+    int ret;                    /* return code */
+    static const unsigned short order[19] = /* permutation of code lengths */
+        {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+
+    /* Check that the strm exists and that the state was initialized */
+    if (strm == Z_NULL || strm->state == Z_NULL)
+        return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+
+    /* Reset the state */
+    strm->msg = Z_NULL;
+    state->mode = TYPE;
+    state->last = 0;
+    state->whave = 0;
+    next = strm->next_in;
+    have = next != Z_NULL ? strm->avail_in : 0;
+    hold = 0;
+    bits = 0;
+    put = state->window;
+    left = state->wsize;
+
+    /* Inflate until end of block marked as last */
+    for (;;)
+        switch (state->mode) {
+        case TYPE:
+            /* determine and dispatch block type */
+            if (state->last) {
+                BYTEBITS();
+                state->mode = DONE;
+                break;
+            }
+            NEEDBITS(3);
+            state->last = BITS(1);
+            DROPBITS(1);
+            switch (BITS(2)) {
+            case 0:                             /* stored block */
+                Tracev((stderr, "inflate:     stored block%s\n",
+                        state->last ? " (last)" : ""));
+                state->mode = STORED;
+                break;
+            case 1:                             /* fixed block */
+                fixedtables(state);
+                Tracev((stderr, "inflate:     fixed codes block%s\n",
+                        state->last ? " (last)" : ""));
+                state->mode = LEN;              /* decode codes */
+                break;
+            case 2:                             /* dynamic block */
+                Tracev((stderr, "inflate:     dynamic codes block%s\n",
+                        state->last ? " (last)" : ""));
+                state->mode = TABLE;
+                break;
+            case 3:
+                strm->msg = (char *)"invalid block type";
+                state->mode = BAD;
+            }
+            DROPBITS(2);
+            break;
+
+        case STORED:
+            /* get and verify stored block length */
+            BYTEBITS();                         /* go to byte boundary */
+            NEEDBITS(32);
+            if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) {
+                strm->msg = (char *)"invalid stored block lengths";
+                state->mode = BAD;
+                break;
+            }
+            state->length = (unsigned)hold & 0xffff;
+            Tracev((stderr, "inflate:       stored length %u\n",
+                    state->length));
+            INITBITS();
+
+            /* copy stored block from input to output */
+            while (state->length != 0) {
+                copy = state->length;
+                PULL();
+                ROOM();
+                if (copy > have) copy = have;
+                if (copy > left) copy = left;
+                zmemcpy(put, next, copy);
+                have -= copy;
+                next += copy;
+                left -= copy;
+                put += copy;
+                state->length -= copy;
+            }
+            Tracev((stderr, "inflate:       stored end\n"));
+            state->mode = TYPE;
+            break;
+
+        case TABLE:
+            /* get dynamic table entries descriptor */
+            NEEDBITS(14);
+            state->nlen = BITS(5) + 257;
+            DROPBITS(5);
+            state->ndist = BITS(5) + 1;
+            DROPBITS(5);
+            state->ncode = BITS(4) + 4;
+            DROPBITS(4);
+#ifndef PKZIP_BUG_WORKAROUND
+            if (state->nlen > 286 || state->ndist > 30) {
+                strm->msg = (char *)"too many length or distance symbols";
+                state->mode = BAD;
+                break;
+            }
+#endif
+            Tracev((stderr, "inflate:       table sizes ok\n"));
+
+            /* get code length code lengths (not a typo) */
+            state->have = 0;
+            while (state->have < state->ncode) {
+                NEEDBITS(3);
+                state->lens[order[state->have++]] = (unsigned short)BITS(3);
+                DROPBITS(3);
+            }
+            while (state->have < 19)
+                state->lens[order[state->have++]] = 0;
+            state->next = state->codes;
+            state->lencode = (code const FAR *)(state->next);
+            state->lenbits = 7;
+            ret = inflate_table(CODES, state->lens, 19, &(state->next),
+                                &(state->lenbits), state->work);
+            if (ret) {
+                strm->msg = (char *)"invalid code lengths set";
+                state->mode = BAD;
+                break;
+            }
+            Tracev((stderr, "inflate:       code lengths ok\n"));
+
+            /* get length and distance code code lengths */
+            state->have = 0;
+            while (state->have < state->nlen + state->ndist) {
+                for (;;) {
+                    here = state->lencode[BITS(state->lenbits)];
+                    if ((unsigned)(here.bits) <= bits) break;
+                    PULLBYTE();
+                }
+                if (here.val < 16) {
+                    DROPBITS(here.bits);
+                    state->lens[state->have++] = here.val;
+                }
+                else {
+                    if (here.val == 16) {
+                        NEEDBITS(here.bits + 2);
+                        DROPBITS(here.bits);
+                        if (state->have == 0) {
+                            strm->msg = (char *)"invalid bit length repeat";
+                            state->mode = BAD;
+                            break;
+                        }
+                        len = (unsigned)(state->lens[state->have - 1]);
+                        copy = 3 + BITS(2);
+                        DROPBITS(2);
+                    }
+                    else if (here.val == 17) {
+                        NEEDBITS(here.bits + 3);
+                        DROPBITS(here.bits);
+                        len = 0;
+                        copy = 3 + BITS(3);
+                        DROPBITS(3);
+                    }
+                    else {
+                        NEEDBITS(here.bits + 7);
+                        DROPBITS(here.bits);
+                        len = 0;
+                        copy = 11 + BITS(7);
+                        DROPBITS(7);
+                    }
+                    if (state->have + copy > state->nlen + state->ndist) {
+                        strm->msg = (char *)"invalid bit length repeat";
+                        state->mode = BAD;
+                        break;
+                    }
+                    while (copy--)
+                        state->lens[state->have++] = (unsigned short)len;
+                }
+            }
+
+            /* handle error breaks in while */
+            if (state->mode == BAD) break;
+
+            /* check for end-of-block code (better have one) */
+            if (state->lens[256] == 0) {
+                strm->msg = (char *)"invalid code -- missing end-of-block";
+                state->mode = BAD;
+                break;
+            }
+
+            /* build code tables -- note: do not change the lenbits or distbits
+               values here (9 and 6) without reading the comments in inftrees.h
+               concerning the ENOUGH constants, which depend on those values */
+            state->next = state->codes;
+            state->lencode = (code const FAR *)(state->next);
+            state->lenbits = 9;
+            ret = inflate_table(LENS, state->lens, state->nlen, &(state->next),
+                                &(state->lenbits), state->work);
+            if (ret) {
+                strm->msg = (char *)"invalid literal/lengths set";
+                state->mode = BAD;
+                break;
+            }
+            state->distcode = (code const FAR *)(state->next);
+            state->distbits = 6;
+            ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist,
+                            &(state->next), &(state->distbits), state->work);
+            if (ret) {
+                strm->msg = (char *)"invalid distances set";
+                state->mode = BAD;
+                break;
+            }
+            Tracev((stderr, "inflate:       codes ok\n"));
+            state->mode = LEN;
+
+        case LEN:
+            /* use inflate_fast() if we have enough input and output */
+            if (have >= 6 && left >= 258) {
+                RESTORE();
+                if (state->whave < state->wsize)
+                    state->whave = state->wsize - left;
+                inflate_fast(strm, state->wsize);
+                LOAD();
+                break;
+            }
+
+            /* get a literal, length, or end-of-block code */
+            for (;;) {
+                here = state->lencode[BITS(state->lenbits)];
+                if ((unsigned)(here.bits) <= bits) break;
+                PULLBYTE();
+            }
+            if (here.op && (here.op & 0xf0) == 0) {
+                last = here;
+                for (;;) {
+                    here = state->lencode[last.val +
+                            (BITS(last.bits + last.op) >> last.bits)];
+                    if ((unsigned)(last.bits + here.bits) <= bits) break;
+                    PULLBYTE();
+                }
+                DROPBITS(last.bits);
+            }
+            DROPBITS(here.bits);
+            state->length = (unsigned)here.val;
+
+            /* process literal */
+            if (here.op == 0) {
+                Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
+                        "inflate:         literal '%c'\n" :
+                        "inflate:         literal 0x%02x\n", here.val));
+                ROOM();
+                *put++ = (unsigned char)(state->length);
+                left--;
+                state->mode = LEN;
+                break;
+            }
+
+            /* process end of block */
+            if (here.op & 32) {
+                Tracevv((stderr, "inflate:         end of block\n"));
+                state->mode = TYPE;
+                break;
+            }
+
+            /* invalid code */
+            if (here.op & 64) {
+                strm->msg = (char *)"invalid literal/length code";
+                state->mode = BAD;
+                break;
+            }
+
+            /* length code -- get extra bits, if any */
+            state->extra = (unsigned)(here.op) & 15;
+            if (state->extra != 0) {
+                NEEDBITS(state->extra);
+                state->length += BITS(state->extra);
+                DROPBITS(state->extra);
+            }
+            Tracevv((stderr, "inflate:         length %u\n", state->length));
+
+            /* get distance code */
+            for (;;) {
+                here = state->distcode[BITS(state->distbits)];
+                if ((unsigned)(here.bits) <= bits) break;
+                PULLBYTE();
+            }
+            if ((here.op & 0xf0) == 0) {
+                last = here;
+                for (;;) {
+                    here = state->distcode[last.val +
+                            (BITS(last.bits + last.op) >> last.bits)];
+                    if ((unsigned)(last.bits + here.bits) <= bits) break;
+                    PULLBYTE();
+                }
+                DROPBITS(last.bits);
+            }
+            DROPBITS(here.bits);
+            if (here.op & 64) {
+                strm->msg = (char *)"invalid distance code";
+                state->mode = BAD;
+                break;
+            }
+            state->offset = (unsigned)here.val;
+
+            /* get distance extra bits, if any */
+            state->extra = (unsigned)(here.op) & 15;
+            if (state->extra != 0) {
+                NEEDBITS(state->extra);
+                state->offset += BITS(state->extra);
+                DROPBITS(state->extra);
+            }
+            if (state->offset > state->wsize - (state->whave < state->wsize ?
+                                                left : 0)) {
+                strm->msg = (char *)"invalid distance too far back";
+                state->mode = BAD;
+                break;
+            }
+            Tracevv((stderr, "inflate:         distance %u\n", state->offset));
+
+            /* copy match from window to output */
+            do {
+                ROOM();
+                copy = state->wsize - state->offset;
+                if (copy < left) {
+                    from = put + copy;
+                    copy = left - copy;
+                }
+                else {
+                    from = put - state->offset;
+                    copy = left;
+                }
+                if (copy > state->length) copy = state->length;
+                state->length -= copy;
+                left -= copy;
+                do {
+                    *put++ = *from++;
+                } while (--copy);
+            } while (state->length != 0);
+            break;
+
+        case DONE:
+            /* inflate stream terminated properly -- write leftover output */
+            ret = Z_STREAM_END;
+            if (left < state->wsize) {
+                if (out(out_desc, state->window, state->wsize - left))
+                    ret = Z_BUF_ERROR;
+            }
+            goto inf_leave;
+
+        case BAD:
+            ret = Z_DATA_ERROR;
+            goto inf_leave;
+
+        default:                /* can't happen, but makes compilers happy */
+            ret = Z_STREAM_ERROR;
+            goto inf_leave;
+        }
+
+    /* Return unused input */
+  inf_leave:
+    strm->next_in = next;
+    strm->avail_in = have;
+    return ret;
+}
+
+int ZEXPORT inflateBackEnd(strm)
+z_streamp strm;
+{
+    if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0)
+        return Z_STREAM_ERROR;
+    ZFREE(strm, strm->state);
+    strm->state = Z_NULL;
+    Tracev((stderr, "inflate: end\n"));
+    return Z_OK;
+}
diff --git a/libraries/zlib/inffast.c b/libraries/zlib/inffast.c
new file mode 100644
index 000000000..0dbd1dbc0
--- /dev/null
+++ b/libraries/zlib/inffast.c
@@ -0,0 +1,323 @@
+/* inffast.c -- fast decoding
+ * Copyright (C) 1995-2017 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "zutil.h"
+#include "inftrees.h"
+#include "inflate.h"
+#include "inffast.h"
+
+#ifdef ASMINF
+#  pragma message("Assembler code may have bugs -- use at your own risk")
+#else
+
+/*
+   Decode literal, length, and distance codes and write out the resulting
+   literal and match bytes until either not enough input or output is
+   available, an end-of-block is encountered, or a data error is encountered.
+   When large enough input and output buffers are supplied to inflate(), for
+   example, a 16K input buffer and a 64K output buffer, more than 95% of the
+   inflate execution time is spent in this routine.
+
+   Entry assumptions:
+
+        state->mode == LEN
+        strm->avail_in >= 6
+        strm->avail_out >= 258
+        start >= strm->avail_out
+        state->bits < 8
+
+   On return, state->mode is one of:
+
+        LEN -- ran out of enough output space or enough available input
+        TYPE -- reached end of block code, inflate() to interpret next block
+        BAD -- error in block data
+
+   Notes:
+
+    - The maximum input bits used by a length/distance pair is 15 bits for the
+      length code, 5 bits for the length extra, 15 bits for the distance code,
+      and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
+      Therefore if strm->avail_in >= 6, then there is enough input to avoid
+      checking for available input while decoding.
+
+    - The maximum bytes that a single length/distance pair can output is 258
+      bytes, which is the maximum length that can be coded.  inflate_fast()
+      requires strm->avail_out >= 258 for each loop to avoid checking for
+      output space.
+ */
+void ZLIB_INTERNAL inflate_fast(strm, start)
+z_streamp strm;
+unsigned start;         /* inflate()'s starting value for strm->avail_out */
+{
+    struct inflate_state FAR *state;
+    z_const unsigned char FAR *in;      /* local strm->next_in */
+    z_const unsigned char FAR *last;    /* have enough input while in < last */
+    unsigned char FAR *out;     /* local strm->next_out */
+    unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
+    unsigned char FAR *end;     /* while out < end, enough space available */
+#ifdef INFLATE_STRICT
+    unsigned dmax;              /* maximum distance from zlib header */
+#endif
+    unsigned wsize;             /* window size or zero if not using window */
+    unsigned whave;             /* valid bytes in the window */
+    unsigned wnext;             /* window write index */
+    unsigned char FAR *window;  /* allocated sliding window, if wsize != 0 */
+    unsigned long hold;         /* local strm->hold */
+    unsigned bits;              /* local strm->bits */
+    code const FAR *lcode;      /* local strm->lencode */
+    code const FAR *dcode;      /* local strm->distcode */
+    unsigned lmask;             /* mask for first level of length codes */
+    unsigned dmask;             /* mask for first level of distance codes */
+    code here;                  /* retrieved table entry */
+    unsigned op;                /* code bits, operation, extra bits, or */
+                                /*  window position, window bytes to copy */
+    unsigned len;               /* match length, unused bytes */
+    unsigned dist;              /* match distance */
+    unsigned char FAR *from;    /* where to copy match from */
+
+    /* copy state to local variables */
+    state = (struct inflate_state FAR *)strm->state;
+    in = strm->next_in;
+    last = in + (strm->avail_in - 5);
+    out = strm->next_out;
+    beg = out - (start - strm->avail_out);
+    end = out + (strm->avail_out - 257);
+#ifdef INFLATE_STRICT
+    dmax = state->dmax;
+#endif
+    wsize = state->wsize;
+    whave = state->whave;
+    wnext = state->wnext;
+    window = state->window;
+    hold = state->hold;
+    bits = state->bits;
+    lcode = state->lencode;
+    dcode = state->distcode;
+    lmask = (1U << state->lenbits) - 1;
+    dmask = (1U << state->distbits) - 1;
+
+    /* decode literals and length/distances until end-of-block or not enough
+       input data or output space */
+    do {
+        if (bits < 15) {
+            hold += (unsigned long)(*in++) << bits;
+            bits += 8;
+            hold += (unsigned long)(*in++) << bits;
+            bits += 8;
+        }
+        here = lcode[hold & lmask];
+      dolen:
+        op = (unsigned)(here.bits);
+        hold >>= op;
+        bits -= op;
+        op = (unsigned)(here.op);
+        if (op == 0) {                          /* literal */
+            Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
+                    "inflate:         literal '%c'\n" :
+                    "inflate:         literal 0x%02x\n", here.val));
+            *out++ = (unsigned char)(here.val);
+        }
+        else if (op & 16) {                     /* length base */
+            len = (unsigned)(here.val);
+            op &= 15;                           /* number of extra bits */
+            if (op) {
+                if (bits < op) {
+                    hold += (unsigned long)(*in++) << bits;
+                    bits += 8;
+                }
+                len += (unsigned)hold & ((1U << op) - 1);
+                hold >>= op;
+                bits -= op;
+            }
+            Tracevv((stderr, "inflate:         length %u\n", len));
+            if (bits < 15) {
+                hold += (unsigned long)(*in++) << bits;
+                bits += 8;
+                hold += (unsigned long)(*in++) << bits;
+                bits += 8;
+            }
+            here = dcode[hold & dmask];
+          dodist:
+            op = (unsigned)(here.bits);
+            hold >>= op;
+            bits -= op;
+            op = (unsigned)(here.op);
+            if (op & 16) {                      /* distance base */
+                dist = (unsigned)(here.val);
+                op &= 15;                       /* number of extra bits */
+                if (bits < op) {
+                    hold += (unsigned long)(*in++) << bits;
+                    bits += 8;
+                    if (bits < op) {
+                        hold += (unsigned long)(*in++) << bits;
+                        bits += 8;
+                    }
+                }
+                dist += (unsigned)hold & ((1U << op) - 1);
+#ifdef INFLATE_STRICT
+                if (dist > dmax) {
+                    strm->msg = (char *)"invalid distance too far back";
+                    state->mode = BAD;
+                    break;
+                }
+#endif
+                hold >>= op;
+                bits -= op;
+                Tracevv((stderr, "inflate:         distance %u\n", dist));
+                op = (unsigned)(out - beg);     /* max distance in output */
+                if (dist > op) {                /* see if copy from window */
+                    op = dist - op;             /* distance back in window */
+                    if (op > whave) {
+                        if (state->sane) {
+                            strm->msg =
+                                (char *)"invalid distance too far back";
+                            state->mode = BAD;
+                            break;
+                        }
+#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
+                        if (len <= op - whave) {
+                            do {
+                                *out++ = 0;
+                            } while (--len);
+                            continue;
+                        }
+                        len -= op - whave;
+                        do {
+                            *out++ = 0;
+                        } while (--op > whave);
+                        if (op == 0) {
+                            from = out - dist;
+                            do {
+                                *out++ = *from++;
+                            } while (--len);
+                            continue;
+                        }
+#endif
+                    }
+                    from = window;
+                    if (wnext == 0) {           /* very common case */
+                        from += wsize - op;
+                        if (op < len) {         /* some from window */
+                            len -= op;
+                            do {
+                                *out++ = *from++;
+                            } while (--op);
+                            from = out - dist;  /* rest from output */
+                        }
+                    }
+                    else if (wnext < op) {      /* wrap around window */
+                        from += wsize + wnext - op;
+                        op -= wnext;
+                        if (op < len) {         /* some from end of window */
+                            len -= op;
+                            do {
+                                *out++ = *from++;
+                            } while (--op);
+                            from = window;
+                            if (wnext < len) {  /* some from start of window */
+                                op = wnext;
+                                len -= op;
+                                do {
+                                    *out++ = *from++;
+                                } while (--op);
+                                from = out - dist;      /* rest from output */
+                            }
+                        }
+                    }
+                    else {                      /* contiguous in window */
+                        from += wnext - op;
+                        if (op < len) {         /* some from window */
+                            len -= op;
+                            do {
+                                *out++ = *from++;
+                            } while (--op);
+                            from = out - dist;  /* rest from output */
+                        }
+                    }
+                    while (len > 2) {
+                        *out++ = *from++;
+                        *out++ = *from++;
+                        *out++ = *from++;
+                        len -= 3;
+                    }
+                    if (len) {
+                        *out++ = *from++;
+                        if (len > 1)
+                            *out++ = *from++;
+                    }
+                }
+                else {
+                    from = out - dist;          /* copy direct from output */
+                    do {                        /* minimum length is three */
+                        *out++ = *from++;
+                        *out++ = *from++;
+                        *out++ = *from++;
+                        len -= 3;
+                    } while (len > 2);
+                    if (len) {
+                        *out++ = *from++;
+                        if (len > 1)
+                            *out++ = *from++;
+                    }
+                }
+            }
+            else if ((op & 64) == 0) {          /* 2nd level distance code */
+                here = dcode[here.val + (hold & ((1U << op) - 1))];
+                goto dodist;
+            }
+            else {
+                strm->msg = (char *)"invalid distance code";
+                state->mode = BAD;
+                break;
+            }
+        }
+        else if ((op & 64) == 0) {              /* 2nd level length code */
+            here = lcode[here.val + (hold & ((1U << op) - 1))];
+            goto dolen;
+        }
+        else if (op & 32) {                     /* end-of-block */
+            Tracevv((stderr, "inflate:         end of block\n"));
+            state->mode = TYPE;
+            break;
+        }
+        else {
+            strm->msg = (char *)"invalid literal/length code";
+            state->mode = BAD;
+            break;
+        }
+    } while (in < last && out < end);
+
+    /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
+    len = bits >> 3;
+    in -= len;
+    bits -= len << 3;
+    hold &= (1U << bits) - 1;
+
+    /* update state and return */
+    strm->next_in = in;
+    strm->next_out = out;
+    strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
+    strm->avail_out = (unsigned)(out < end ?
+                                 257 + (end - out) : 257 - (out - end));
+    state->hold = hold;
+    state->bits = bits;
+    return;
+}
+
+/*
+   inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
+   - Using bit fields for code structure
+   - Different op definition to avoid & for extra bits (do & for table bits)
+   - Three separate decoding do-loops for direct, window, and wnext == 0
+   - Special case for distance > 1 copies to do overlapped load and store copy
+   - Explicit branch predictions (based on measured branch probabilities)
+   - Deferring match copy and interspersed it with decoding subsequent codes
+   - Swapping literal/length else
+   - Swapping window/direct else
+   - Larger unrolled copy loops (three is about right)
+   - Moving len -= 3 statement into middle of loop
+ */
+
+#endif /* !ASMINF */
diff --git a/libraries/zlib/inffast.h b/libraries/zlib/inffast.h
new file mode 100644
index 000000000..e5c1aa4ca
--- /dev/null
+++ b/libraries/zlib/inffast.h
@@ -0,0 +1,11 @@
+/* inffast.h -- header to use inffast.c
+ * Copyright (C) 1995-2003, 2010 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+   part of the implementation of the compression library and is
+   subject to change. Applications should only use zlib.h.
+ */
+
+void ZLIB_INTERNAL inflate_fast OF((z_streamp strm, unsigned start));
diff --git a/libraries/zlib/inffixed.h b/libraries/zlib/inffixed.h
new file mode 100644
index 000000000..d62832776
--- /dev/null
+++ b/libraries/zlib/inffixed.h
@@ -0,0 +1,94 @@
+    /* inffixed.h -- table for decoding fixed codes
+     * Generated automatically by makefixed().
+     */
+
+    /* WARNING: this file should *not* be used by applications.
+       It is part of the implementation of this library and is
+       subject to change. Applications should only use zlib.h.
+     */
+
+    static const code lenfix[512] = {
+        {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48},
+        {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128},
+        {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59},
+        {0,8,120},{0,8,56},{0,9,208},{17,7,17},{0,8,104},{0,8,40},{0,9,176},
+        {0,8,8},{0,8,136},{0,8,72},{0,9,240},{16,7,4},{0,8,84},{0,8,20},
+        {21,8,227},{19,7,43},{0,8,116},{0,8,52},{0,9,200},{17,7,13},{0,8,100},
+        {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232},{16,7,8},
+        {0,8,92},{0,8,28},{0,9,152},{20,7,83},{0,8,124},{0,8,60},{0,9,216},
+        {18,7,23},{0,8,108},{0,8,44},{0,9,184},{0,8,12},{0,8,140},{0,8,76},
+        {0,9,248},{16,7,3},{0,8,82},{0,8,18},{21,8,163},{19,7,35},{0,8,114},
+        {0,8,50},{0,9,196},{17,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2},
+        {0,8,130},{0,8,66},{0,9,228},{16,7,7},{0,8,90},{0,8,26},{0,9,148},
+        {20,7,67},{0,8,122},{0,8,58},{0,9,212},{18,7,19},{0,8,106},{0,8,42},
+        {0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244},{16,7,5},{0,8,86},
+        {0,8,22},{64,8,0},{19,7,51},{0,8,118},{0,8,54},{0,9,204},{17,7,15},
+        {0,8,102},{0,8,38},{0,9,172},{0,8,6},{0,8,134},{0,8,70},{0,9,236},
+        {16,7,9},{0,8,94},{0,8,30},{0,9,156},{20,7,99},{0,8,126},{0,8,62},
+        {0,9,220},{18,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142},
+        {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{21,8,131},{18,7,31},
+        {0,8,113},{0,8,49},{0,9,194},{16,7,10},{0,8,97},{0,8,33},{0,9,162},
+        {0,8,1},{0,8,129},{0,8,65},{0,9,226},{16,7,6},{0,8,89},{0,8,25},
+        {0,9,146},{19,7,59},{0,8,121},{0,8,57},{0,9,210},{17,7,17},{0,8,105},
+        {0,8,41},{0,9,178},{0,8,9},{0,8,137},{0,8,73},{0,9,242},{16,7,4},
+        {0,8,85},{0,8,21},{16,8,258},{19,7,43},{0,8,117},{0,8,53},{0,9,202},
+        {17,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133},{0,8,69},
+        {0,9,234},{16,7,8},{0,8,93},{0,8,29},{0,9,154},{20,7,83},{0,8,125},
+        {0,8,61},{0,9,218},{18,7,23},{0,8,109},{0,8,45},{0,9,186},{0,8,13},
+        {0,8,141},{0,8,77},{0,9,250},{16,7,3},{0,8,83},{0,8,19},{21,8,195},
+        {19,7,35},{0,8,115},{0,8,51},{0,9,198},{17,7,11},{0,8,99},{0,8,35},
+        {0,9,166},{0,8,3},{0,8,131},{0,8,67},{0,9,230},{16,7,7},{0,8,91},
+        {0,8,27},{0,9,150},{20,7,67},{0,8,123},{0,8,59},{0,9,214},{18,7,19},
+        {0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139},{0,8,75},{0,9,246},
+        {16,7,5},{0,8,87},{0,8,23},{64,8,0},{19,7,51},{0,8,119},{0,8,55},
+        {0,9,206},{17,7,15},{0,8,103},{0,8,39},{0,9,174},{0,8,7},{0,8,135},
+        {0,8,71},{0,9,238},{16,7,9},{0,8,95},{0,8,31},{0,9,158},{20,7,99},
+        {0,8,127},{0,8,63},{0,9,222},{18,7,27},{0,8,111},{0,8,47},{0,9,190},
+        {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80},{0,8,16},
+        {20,8,115},{18,7,31},{0,8,112},{0,8,48},{0,9,193},{16,7,10},{0,8,96},
+        {0,8,32},{0,9,161},{0,8,0},{0,8,128},{0,8,64},{0,9,225},{16,7,6},
+        {0,8,88},{0,8,24},{0,9,145},{19,7,59},{0,8,120},{0,8,56},{0,9,209},
+        {17,7,17},{0,8,104},{0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72},
+        {0,9,241},{16,7,4},{0,8,84},{0,8,20},{21,8,227},{19,7,43},{0,8,116},
+        {0,8,52},{0,9,201},{17,7,13},{0,8,100},{0,8,36},{0,9,169},{0,8,4},
+        {0,8,132},{0,8,68},{0,9,233},{16,7,8},{0,8,92},{0,8,28},{0,9,153},
+        {20,7,83},{0,8,124},{0,8,60},{0,9,217},{18,7,23},{0,8,108},{0,8,44},
+        {0,9,185},{0,8,12},{0,8,140},{0,8,76},{0,9,249},{16,7,3},{0,8,82},
+        {0,8,18},{21,8,163},{19,7,35},{0,8,114},{0,8,50},{0,9,197},{17,7,11},
+        {0,8,98},{0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229},
+        {16,7,7},{0,8,90},{0,8,26},{0,9,149},{20,7,67},{0,8,122},{0,8,58},
+        {0,9,213},{18,7,19},{0,8,106},{0,8,42},{0,9,181},{0,8,10},{0,8,138},
+        {0,8,74},{0,9,245},{16,7,5},{0,8,86},{0,8,22},{64,8,0},{19,7,51},
+        {0,8,118},{0,8,54},{0,9,205},{17,7,15},{0,8,102},{0,8,38},{0,9,173},
+        {0,8,6},{0,8,134},{0,8,70},{0,9,237},{16,7,9},{0,8,94},{0,8,30},
+        {0,9,157},{20,7,99},{0,8,126},{0,8,62},{0,9,221},{18,7,27},{0,8,110},
+        {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253},{96,7,0},
+        {0,8,81},{0,8,17},{21,8,131},{18,7,31},{0,8,113},{0,8,49},{0,9,195},
+        {16,7,10},{0,8,97},{0,8,33},{0,9,163},{0,8,1},{0,8,129},{0,8,65},
+        {0,9,227},{16,7,6},{0,8,89},{0,8,25},{0,9,147},{19,7,59},{0,8,121},
+        {0,8,57},{0,9,211},{17,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9},
+        {0,8,137},{0,8,73},{0,9,243},{16,7,4},{0,8,85},{0,8,21},{16,8,258},
+        {19,7,43},{0,8,117},{0,8,53},{0,9,203},{17,7,13},{0,8,101},{0,8,37},
+        {0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235},{16,7,8},{0,8,93},
+        {0,8,29},{0,9,155},{20,7,83},{0,8,125},{0,8,61},{0,9,219},{18,7,23},
+        {0,8,109},{0,8,45},{0,9,187},{0,8,13},{0,8,141},{0,8,77},{0,9,251},
+        {16,7,3},{0,8,83},{0,8,19},{21,8,195},{19,7,35},{0,8,115},{0,8,51},
+        {0,9,199},{17,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131},
+        {0,8,67},{0,9,231},{16,7,7},{0,8,91},{0,8,27},{0,9,151},{20,7,67},
+        {0,8,123},{0,8,59},{0,9,215},{18,7,19},{0,8,107},{0,8,43},{0,9,183},
+        {0,8,11},{0,8,139},{0,8,75},{0,9,247},{16,7,5},{0,8,87},{0,8,23},
+        {64,8,0},{19,7,51},{0,8,119},{0,8,55},{0,9,207},{17,7,15},{0,8,103},
+        {0,8,39},{0,9,175},{0,8,7},{0,8,135},{0,8,71},{0,9,239},{16,7,9},
+        {0,8,95},{0,8,31},{0,9,159},{20,7,99},{0,8,127},{0,8,63},{0,9,223},
+        {18,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143},{0,8,79},
+        {0,9,255}
+    };
+
+    static const code distfix[32] = {
+        {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025},
+        {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193},
+        {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385},
+        {19,5,25},{27,5,6145},{17,5,7},{25,5,1537},{21,5,97},{29,5,24577},
+        {16,5,4},{24,5,769},{20,5,49},{28,5,12289},{18,5,13},{26,5,3073},
+        {22,5,193},{64,5,0}
+    };
diff --git a/libraries/zlib/inflate.c b/libraries/zlib/inflate.c
new file mode 100644
index 000000000..ac333e8c2
--- /dev/null
+++ b/libraries/zlib/inflate.c
@@ -0,0 +1,1561 @@
+/* inflate.c -- zlib decompression
+ * Copyright (C) 1995-2016 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/*
+ * Change history:
+ *
+ * 1.2.beta0    24 Nov 2002
+ * - First version -- complete rewrite of inflate to simplify code, avoid
+ *   creation of window when not needed, minimize use of window when it is
+ *   needed, make inffast.c even faster, implement gzip decoding, and to
+ *   improve code readability and style over the previous zlib inflate code
+ *
+ * 1.2.beta1    25 Nov 2002
+ * - Use pointers for available input and output checking in inffast.c
+ * - Remove input and output counters in inffast.c
+ * - Change inffast.c entry and loop from avail_in >= 7 to >= 6
+ * - Remove unnecessary second byte pull from length extra in inffast.c
+ * - Unroll direct copy to three copies per loop in inffast.c
+ *
+ * 1.2.beta2    4 Dec 2002
+ * - Change external routine names to reduce potential conflicts
+ * - Correct filename to inffixed.h for fixed tables in inflate.c
+ * - Make hbuf[] unsigned char to match parameter type in inflate.c
+ * - Change strm->next_out[-state->offset] to *(strm->next_out - state->offset)
+ *   to avoid negation problem on Alphas (64 bit) in inflate.c
+ *
+ * 1.2.beta3    22 Dec 2002
+ * - Add comments on state->bits assertion in inffast.c
+ * - Add comments on op field in inftrees.h
+ * - Fix bug in reuse of allocated window after inflateReset()
+ * - Remove bit fields--back to byte structure for speed
+ * - Remove distance extra == 0 check in inflate_fast()--only helps for lengths
+ * - Change post-increments to pre-increments in inflate_fast(), PPC biased?
+ * - Add compile time option, POSTINC, to use post-increments instead (Intel?)
+ * - Make MATCH copy in inflate() much faster for when inflate_fast() not used
+ * - Use local copies of stream next and avail values, as well as local bit
+ *   buffer and bit count in inflate()--for speed when inflate_fast() not used
+ *
+ * 1.2.beta4    1 Jan 2003
+ * - Split ptr - 257 statements in inflate_table() to avoid compiler warnings
+ * - Move a comment on output buffer sizes from inffast.c to inflate.c
+ * - Add comments in inffast.c to introduce the inflate_fast() routine
+ * - Rearrange window copies in inflate_fast() for speed and simplification
+ * - Unroll last copy for window match in inflate_fast()
+ * - Use local copies of window variables in inflate_fast() for speed
+ * - Pull out common wnext == 0 case for speed in inflate_fast()
+ * - Make op and len in inflate_fast() unsigned for consistency
+ * - Add FAR to lcode and dcode declarations in inflate_fast()
+ * - Simplified bad distance check in inflate_fast()
+ * - Added inflateBackInit(), inflateBack(), and inflateBackEnd() in new
+ *   source file infback.c to provide a call-back interface to inflate for
+ *   programs like gzip and unzip -- uses window as output buffer to avoid
+ *   window copying
+ *
+ * 1.2.beta5    1 Jan 2003
+ * - Improved inflateBack() interface to allow the caller to provide initial
+ *   input in strm.
+ * - Fixed stored blocks bug in inflateBack()
+ *
+ * 1.2.beta6    4 Jan 2003
+ * - Added comments in inffast.c on effectiveness of POSTINC
+ * - Typecasting all around to reduce compiler warnings
+ * - Changed loops from while (1) or do {} while (1) to for (;;), again to
+ *   make compilers happy
+ * - Changed type of window in inflateBackInit() to unsigned char *
+ *
+ * 1.2.beta7    27 Jan 2003
+ * - Changed many types to unsigned or unsigned short to avoid warnings
+ * - Added inflateCopy() function
+ *
+ * 1.2.0        9 Mar 2003
+ * - Changed inflateBack() interface to provide separate opaque descriptors
+ *   for the in() and out() functions
+ * - Changed inflateBack() argument and in_func typedef to swap the length
+ *   and buffer address return values for the input function
+ * - Check next_in and next_out for Z_NULL on entry to inflate()
+ *
+ * The history for versions after 1.2.0 are in ChangeLog in zlib distribution.
+ */
+
+#include "zutil.h"
+#include "inftrees.h"
+#include "inflate.h"
+#include "inffast.h"
+
+#ifdef MAKEFIXED
+#  ifndef BUILDFIXED
+#    define BUILDFIXED
+#  endif
+#endif
+
+/* function prototypes */
+local int inflateStateCheck OF((z_streamp strm));
+local void fixedtables OF((struct inflate_state FAR *state));
+local int updatewindow OF((z_streamp strm, const unsigned char FAR *end,
+                           unsigned copy));
+#ifdef BUILDFIXED
+   void makefixed OF((void));
+#endif
+local unsigned syncsearch OF((unsigned FAR *have, const unsigned char FAR *buf,
+                              unsigned len));
+
+local int inflateStateCheck(strm)
+z_streamp strm;
+{
+    struct inflate_state FAR *state;
+    if (strm == Z_NULL ||
+        strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0)
+        return 1;
+    state = (struct inflate_state FAR *)strm->state;
+    if (state == Z_NULL || state->strm != strm ||
+        state->mode < HEAD || state->mode > SYNC)
+        return 1;
+    return 0;
+}
+
+int ZEXPORT inflateResetKeep(strm)
+z_streamp strm;
+{
+    struct inflate_state FAR *state;
+
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    strm->total_in = strm->total_out = state->total = 0;
+    strm->msg = Z_NULL;
+    if (state->wrap)        /* to support ill-conceived Java test suite */
+        strm->adler = state->wrap & 1;
+    state->mode = HEAD;
+    state->last = 0;
+    state->havedict = 0;
+    state->dmax = 32768U;
+    state->head = Z_NULL;
+    state->hold = 0;
+    state->bits = 0;
+    state->lencode = state->distcode = state->next = state->codes;
+    state->sane = 1;
+    state->back = -1;
+    Tracev((stderr, "inflate: reset\n"));
+    return Z_OK;
+}
+
+int ZEXPORT inflateReset(strm)
+z_streamp strm;
+{
+    struct inflate_state FAR *state;
+
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    state->wsize = 0;
+    state->whave = 0;
+    state->wnext = 0;
+    return inflateResetKeep(strm);
+}
+
+int ZEXPORT inflateReset2(strm, windowBits)
+z_streamp strm;
+int windowBits;
+{
+    int wrap;
+    struct inflate_state FAR *state;
+
+    /* get the state */
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+
+    /* extract wrap request from windowBits parameter */
+    if (windowBits < 0) {
+        wrap = 0;
+        windowBits = -windowBits;
+    }
+    else {
+        wrap = (windowBits >> 4) + 5;
+#ifdef GUNZIP
+        if (windowBits < 48)
+            windowBits &= 15;
+#endif
+    }
+
+    /* set number of window bits, free window if different */
+    if (windowBits && (windowBits < 8 || windowBits > 15))
+        return Z_STREAM_ERROR;
+    if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) {
+        ZFREE(strm, state->window);
+        state->window = Z_NULL;
+    }
+
+    /* update state and reset the rest of it */
+    state->wrap = wrap;
+    state->wbits = (unsigned)windowBits;
+    return inflateReset(strm);
+}
+
+int ZEXPORT inflateInit2_(strm, windowBits, version, stream_size)
+z_streamp strm;
+int windowBits;
+const char *version;
+int stream_size;
+{
+    int ret;
+    struct inflate_state FAR *state;
+
+    if (version == Z_NULL || version[0] != ZLIB_VERSION[0] ||
+        stream_size != (int)(sizeof(z_stream)))
+        return Z_VERSION_ERROR;
+    if (strm == Z_NULL) return Z_STREAM_ERROR;
+    strm->msg = Z_NULL;                 /* in case we return an error */
+    if (strm->zalloc == (alloc_func)0) {
+#ifdef Z_SOLO
+        return Z_STREAM_ERROR;
+#else
+        strm->zalloc = zcalloc;
+        strm->opaque = (voidpf)0;
+#endif
+    }
+    if (strm->zfree == (free_func)0)
+#ifdef Z_SOLO
+        return Z_STREAM_ERROR;
+#else
+        strm->zfree = zcfree;
+#endif
+    state = (struct inflate_state FAR *)
+            ZALLOC(strm, 1, sizeof(struct inflate_state));
+    if (state == Z_NULL) return Z_MEM_ERROR;
+    Tracev((stderr, "inflate: allocated\n"));
+    strm->state = (struct internal_state FAR *)state;
+    state->strm = strm;
+    state->window = Z_NULL;
+    state->mode = HEAD;     /* to pass state test in inflateReset2() */
+    ret = inflateReset2(strm, windowBits);
+    if (ret != Z_OK) {
+        ZFREE(strm, state);
+        strm->state = Z_NULL;
+    }
+    return ret;
+}
+
+int ZEXPORT inflateInit_(strm, version, stream_size)
+z_streamp strm;
+const char *version;
+int stream_size;
+{
+    return inflateInit2_(strm, DEF_WBITS, version, stream_size);
+}
+
+int ZEXPORT inflatePrime(strm, bits, value)
+z_streamp strm;
+int bits;
+int value;
+{
+    struct inflate_state FAR *state;
+
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    if (bits < 0) {
+        state->hold = 0;
+        state->bits = 0;
+        return Z_OK;
+    }
+    if (bits > 16 || state->bits + (uInt)bits > 32) return Z_STREAM_ERROR;
+    value &= (1L << bits) - 1;
+    state->hold += (unsigned)value << state->bits;
+    state->bits += (uInt)bits;
+    return Z_OK;
+}
+
+/*
+   Return state with length and distance decoding tables and index sizes set to
+   fixed code decoding.  Normally this returns fixed tables from inffixed.h.
+   If BUILDFIXED is defined, then instead this routine builds the tables the
+   first time it's called, and returns those tables the first time and
+   thereafter.  This reduces the size of the code by about 2K bytes, in
+   exchange for a little execution time.  However, BUILDFIXED should not be
+   used for threaded applications, since the rewriting of the tables and virgin
+   may not be thread-safe.
+ */
+local void fixedtables(state)
+struct inflate_state FAR *state;
+{
+#ifdef BUILDFIXED
+    static int virgin = 1;
+    static code *lenfix, *distfix;
+    static code fixed[544];
+
+    /* build fixed huffman tables if first call (may not be thread safe) */
+    if (virgin) {
+        unsigned sym, bits;
+        static code *next;
+
+        /* literal/length table */
+        sym = 0;
+        while (sym < 144) state->lens[sym++] = 8;
+        while (sym < 256) state->lens[sym++] = 9;
+        while (sym < 280) state->lens[sym++] = 7;
+        while (sym < 288) state->lens[sym++] = 8;
+        next = fixed;
+        lenfix = next;
+        bits = 9;
+        inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work);
+
+        /* distance table */
+        sym = 0;
+        while (sym < 32) state->lens[sym++] = 5;
+        distfix = next;
+        bits = 5;
+        inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work);
+
+        /* do this just once */
+        virgin = 0;
+    }
+#else /* !BUILDFIXED */
+#   include "inffixed.h"
+#endif /* BUILDFIXED */
+    state->lencode = lenfix;
+    state->lenbits = 9;
+    state->distcode = distfix;
+    state->distbits = 5;
+}
+
+#ifdef MAKEFIXED
+#include <stdio.h>
+
+/*
+   Write out the inffixed.h that is #include'd above.  Defining MAKEFIXED also
+   defines BUILDFIXED, so the tables are built on the fly.  makefixed() writes
+   those tables to stdout, which would be piped to inffixed.h.  A small program
+   can simply call makefixed to do this:
+
+    void makefixed(void);
+
+    int main(void)
+    {
+        makefixed();
+        return 0;
+    }
+
+   Then that can be linked with zlib built with MAKEFIXED defined and run:
+
+    a.out > inffixed.h
+ */
+void makefixed()
+{
+    unsigned low, size;
+    struct inflate_state state;
+
+    fixedtables(&state);
+    puts("    /* inffixed.h -- table for decoding fixed codes");
+    puts("     * Generated automatically by makefixed().");
+    puts("     */");
+    puts("");
+    puts("    /* WARNING: this file should *not* be used by applications.");
+    puts("       It is part of the implementation of this library and is");
+    puts("       subject to change. Applications should only use zlib.h.");
+    puts("     */");
+    puts("");
+    size = 1U << 9;
+    printf("    static const code lenfix[%u] = {", size);
+    low = 0;
+    for (;;) {
+        if ((low % 7) == 0) printf("\n        ");
+        printf("{%u,%u,%d}", (low & 127) == 99 ? 64 : state.lencode[low].op,
+               state.lencode[low].bits, state.lencode[low].val);
+        if (++low == size) break;
+        putchar(',');
+    }
+    puts("\n    };");
+    size = 1U << 5;
+    printf("\n    static const code distfix[%u] = {", size);
+    low = 0;
+    for (;;) {
+        if ((low % 6) == 0) printf("\n        ");
+        printf("{%u,%u,%d}", state.distcode[low].op, state.distcode[low].bits,
+               state.distcode[low].val);
+        if (++low == size) break;
+        putchar(',');
+    }
+    puts("\n    };");
+}
+#endif /* MAKEFIXED */
+
+/*
+   Update the window with the last wsize (normally 32K) bytes written before
+   returning.  If window does not exist yet, create it.  This is only called
+   when a window is already in use, or when output has been written during this
+   inflate call, but the end of the deflate stream has not been reached yet.
+   It is also called to create a window for dictionary data when a dictionary
+   is loaded.
+
+   Providing output buffers larger than 32K to inflate() should provide a speed
+   advantage, since only the last 32K of output is copied to the sliding window
+   upon return from inflate(), and since all distances after the first 32K of
+   output will fall in the output data, making match copies simpler and faster.
+   The advantage may be dependent on the size of the processor's data caches.
+ */
+local int updatewindow(strm, end, copy)
+z_streamp strm;
+const Bytef *end;
+unsigned copy;
+{
+    struct inflate_state FAR *state;
+    unsigned dist;
+
+    state = (struct inflate_state FAR *)strm->state;
+
+    /* if it hasn't been done already, allocate space for the window */
+    if (state->window == Z_NULL) {
+        state->window = (unsigned char FAR *)
+                        ZALLOC(strm, 1U << state->wbits,
+                               sizeof(unsigned char));
+        if (state->window == Z_NULL) return 1;
+    }
+
+    /* if window not in use yet, initialize */
+    if (state->wsize == 0) {
+        state->wsize = 1U << state->wbits;
+        state->wnext = 0;
+        state->whave = 0;
+    }
+
+    /* copy state->wsize or less output bytes into the circular window */
+    if (copy >= state->wsize) {
+        zmemcpy(state->window, end - state->wsize, state->wsize);
+        state->wnext = 0;
+        state->whave = state->wsize;
+    }
+    else {
+        dist = state->wsize - state->wnext;
+        if (dist > copy) dist = copy;
+        zmemcpy(state->window + state->wnext, end - copy, dist);
+        copy -= dist;
+        if (copy) {
+            zmemcpy(state->window, end - copy, copy);
+            state->wnext = copy;
+            state->whave = state->wsize;
+        }
+        else {
+            state->wnext += dist;
+            if (state->wnext == state->wsize) state->wnext = 0;
+            if (state->whave < state->wsize) state->whave += dist;
+        }
+    }
+    return 0;
+}
+
+/* Macros for inflate(): */
+
+/* check function to use adler32() for zlib or crc32() for gzip */
+#ifdef GUNZIP
+#  define UPDATE(check, buf, len) \
+    (state->flags ? crc32(check, buf, len) : adler32(check, buf, len))
+#else
+#  define UPDATE(check, buf, len) adler32(check, buf, len)
+#endif
+
+/* check macros for header crc */
+#ifdef GUNZIP
+#  define CRC2(check, word) \
+    do { \
+        hbuf[0] = (unsigned char)(word); \
+        hbuf[1] = (unsigned char)((word) >> 8); \
+        check = crc32(check, hbuf, 2); \
+    } while (0)
+
+#  define CRC4(check, word) \
+    do { \
+        hbuf[0] = (unsigned char)(word); \
+        hbuf[1] = (unsigned char)((word) >> 8); \
+        hbuf[2] = (unsigned char)((word) >> 16); \
+        hbuf[3] = (unsigned char)((word) >> 24); \
+        check = crc32(check, hbuf, 4); \
+    } while (0)
+#endif
+
+/* Load registers with state in inflate() for speed */
+#define LOAD() \
+    do { \
+        put = strm->next_out; \
+        left = strm->avail_out; \
+        next = strm->next_in; \
+        have = strm->avail_in; \
+        hold = state->hold; \
+        bits = state->bits; \
+    } while (0)
+
+/* Restore state from registers in inflate() */
+#define RESTORE() \
+    do { \
+        strm->next_out = put; \
+        strm->avail_out = left; \
+        strm->next_in = next; \
+        strm->avail_in = have; \
+        state->hold = hold; \
+        state->bits = bits; \
+    } while (0)
+
+/* Clear the input bit accumulator */
+#define INITBITS() \
+    do { \
+        hold = 0; \
+        bits = 0; \
+    } while (0)
+
+/* Get a byte of input into the bit accumulator, or return from inflate()
+   if there is no input available. */
+#define PULLBYTE() \
+    do { \
+        if (have == 0) goto inf_leave; \
+        have--; \
+        hold += (unsigned long)(*next++) << bits; \
+        bits += 8; \
+    } while (0)
+
+/* Assure that there are at least n bits in the bit accumulator.  If there is
+   not enough available input to do that, then return from inflate(). */
+#define NEEDBITS(n) \
+    do { \
+        while (bits < (unsigned)(n)) \
+            PULLBYTE(); \
+    } while (0)
+
+/* Return the low n bits of the bit accumulator (n < 16) */
+#define BITS(n) \
+    ((unsigned)hold & ((1U << (n)) - 1))
+
+/* Remove n bits from the bit accumulator */
+#define DROPBITS(n) \
+    do { \
+        hold >>= (n); \
+        bits -= (unsigned)(n); \
+    } while (0)
+
+/* Remove zero to seven bits as needed to go to a byte boundary */
+#define BYTEBITS() \
+    do { \
+        hold >>= bits & 7; \
+        bits -= bits & 7; \
+    } while (0)
+
+/*
+   inflate() uses a state machine to process as much input data and generate as
+   much output data as possible before returning.  The state machine is
+   structured roughly as follows:
+
+    for (;;) switch (state) {
+    ...
+    case STATEn:
+        if (not enough input data or output space to make progress)
+            return;
+        ... make progress ...
+        state = STATEm;
+        break;
+    ...
+    }
+
+   so when inflate() is called again, the same case is attempted again, and
+   if the appropriate resources are provided, the machine proceeds to the
+   next state.  The NEEDBITS() macro is usually the way the state evaluates
+   whether it can proceed or should return.  NEEDBITS() does the return if
+   the requested bits are not available.  The typical use of the BITS macros
+   is:
+
+        NEEDBITS(n);
+        ... do something with BITS(n) ...
+        DROPBITS(n);
+
+   where NEEDBITS(n) either returns from inflate() if there isn't enough
+   input left to load n bits into the accumulator, or it continues.  BITS(n)
+   gives the low n bits in the accumulator.  When done, DROPBITS(n) drops
+   the low n bits off the accumulator.  INITBITS() clears the accumulator
+   and sets the number of available bits to zero.  BYTEBITS() discards just
+   enough bits to put the accumulator on a byte boundary.  After BYTEBITS()
+   and a NEEDBITS(8), then BITS(8) would return the next byte in the stream.
+
+   NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to return
+   if there is no input available.  The decoding of variable length codes uses
+   PULLBYTE() directly in order to pull just enough bytes to decode the next
+   code, and no more.
+
+   Some states loop until they get enough input, making sure that enough
+   state information is maintained to continue the loop where it left off
+   if NEEDBITS() returns in the loop.  For example, want, need, and keep
+   would all have to actually be part of the saved state in case NEEDBITS()
+   returns:
+
+    case STATEw:
+        while (want < need) {
+            NEEDBITS(n);
+            keep[want++] = BITS(n);
+            DROPBITS(n);
+        }
+        state = STATEx;
+    case STATEx:
+
+   As shown above, if the next state is also the next case, then the break
+   is omitted.
+
+   A state may also return if there is not enough output space available to
+   complete that state.  Those states are copying stored data, writing a
+   literal byte, and copying a matching string.
+
+   When returning, a "goto inf_leave" is used to update the total counters,
+   update the check value, and determine whether any progress has been made
+   during that inflate() call in order to return the proper return code.
+   Progress is defined as a change in either strm->avail_in or strm->avail_out.
+   When there is a window, goto inf_leave will update the window with the last
+   output written.  If a goto inf_leave occurs in the middle of decompression
+   and there is no window currently, goto inf_leave will create one and copy
+   output to the window for the next call of inflate().
+
+   In this implementation, the flush parameter of inflate() only affects the
+   return code (per zlib.h).  inflate() always writes as much as possible to
+   strm->next_out, given the space available and the provided input--the effect
+   documented in zlib.h of Z_SYNC_FLUSH.  Furthermore, inflate() always defers
+   the allocation of and copying into a sliding window until necessary, which
+   provides the effect documented in zlib.h for Z_FINISH when the entire input
+   stream available.  So the only thing the flush parameter actually does is:
+   when flush is set to Z_FINISH, inflate() cannot return Z_OK.  Instead it
+   will return Z_BUF_ERROR if it has not reached the end of the stream.
+ */
+
+int ZEXPORT inflate(strm, flush)
+z_streamp strm;
+int flush;
+{
+    struct inflate_state FAR *state;
+    z_const unsigned char FAR *next;    /* next input */
+    unsigned char FAR *put;     /* next output */
+    unsigned have, left;        /* available input and output */
+    unsigned long hold;         /* bit buffer */
+    unsigned bits;              /* bits in bit buffer */
+    unsigned in, out;           /* save starting available input and output */
+    unsigned copy;              /* number of stored or match bytes to copy */
+    unsigned char FAR *from;    /* where to copy match bytes from */
+    code here;                  /* current decoding table entry */
+    code last;                  /* parent table entry */
+    unsigned len;               /* length to copy for repeats, bits to drop */
+    int ret;                    /* return code */
+#ifdef GUNZIP
+    unsigned char hbuf[4];      /* buffer for gzip header crc calculation */
+#endif
+    static const unsigned short order[19] = /* permutation of code lengths */
+        {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+
+    if (inflateStateCheck(strm) || strm->next_out == Z_NULL ||
+        (strm->next_in == Z_NULL && strm->avail_in != 0))
+        return Z_STREAM_ERROR;
+
+    state = (struct inflate_state FAR *)strm->state;
+    if (state->mode == TYPE) state->mode = TYPEDO;      /* skip check */
+    LOAD();
+    in = have;
+    out = left;
+    ret = Z_OK;
+    for (;;)
+        switch (state->mode) {
+        case HEAD:
+            if (state->wrap == 0) {
+                state->mode = TYPEDO;
+                break;
+            }
+            NEEDBITS(16);
+#ifdef GUNZIP
+            if ((state->wrap & 2) && hold == 0x8b1f) {  /* gzip header */
+                if (state->wbits == 0)
+                    state->wbits = 15;
+                state->check = crc32(0L, Z_NULL, 0);
+                CRC2(state->check, hold);
+                INITBITS();
+                state->mode = FLAGS;
+                break;
+            }
+            state->flags = 0;           /* expect zlib header */
+            if (state->head != Z_NULL)
+                state->head->done = -1;
+            if (!(state->wrap & 1) ||   /* check if zlib header allowed */
+#else
+            if (
+#endif
+                ((BITS(8) << 8) + (hold >> 8)) % 31) {
+                strm->msg = (char *)"incorrect header check";
+                state->mode = BAD;
+                break;
+            }
+            if (BITS(4) != Z_DEFLATED) {
+                strm->msg = (char *)"unknown compression method";
+                state->mode = BAD;
+                break;
+            }
+            DROPBITS(4);
+            len = BITS(4) + 8;
+            if (state->wbits == 0)
+                state->wbits = len;
+            if (len > 15 || len > state->wbits) {
+                strm->msg = (char *)"invalid window size";
+                state->mode = BAD;
+                break;
+            }
+            state->dmax = 1U << len;
+            Tracev((stderr, "inflate:   zlib header ok\n"));
+            strm->adler = state->check = adler32(0L, Z_NULL, 0);
+            state->mode = hold & 0x200 ? DICTID : TYPE;
+            INITBITS();
+            break;
+#ifdef GUNZIP
+        case FLAGS:
+            NEEDBITS(16);
+            state->flags = (int)(hold);
+            if ((state->flags & 0xff) != Z_DEFLATED) {
+                strm->msg = (char *)"unknown compression method";
+                state->mode = BAD;
+                break;
+            }
+            if (state->flags & 0xe000) {
+                strm->msg = (char *)"unknown header flags set";
+                state->mode = BAD;
+                break;
+            }
+            if (state->head != Z_NULL)
+                state->head->text = (int)((hold >> 8) & 1);
+            if ((state->flags & 0x0200) && (state->wrap & 4))
+                CRC2(state->check, hold);
+            INITBITS();
+            state->mode = TIME;
+        case TIME:
+            NEEDBITS(32);
+            if (state->head != Z_NULL)
+                state->head->time = hold;
+            if ((state->flags & 0x0200) && (state->wrap & 4))
+                CRC4(state->check, hold);
+            INITBITS();
+            state->mode = OS;
+        case OS:
+            NEEDBITS(16);
+            if (state->head != Z_NULL) {
+                state->head->xflags = (int)(hold & 0xff);
+                state->head->os = (int)(hold >> 8);
+            }
+            if ((state->flags & 0x0200) && (state->wrap & 4))
+                CRC2(state->check, hold);
+            INITBITS();
+            state->mode = EXLEN;
+        case EXLEN:
+            if (state->flags & 0x0400) {
+                NEEDBITS(16);
+                state->length = (unsigned)(hold);
+                if (state->head != Z_NULL)
+                    state->head->extra_len = (unsigned)hold;
+                if ((state->flags & 0x0200) && (state->wrap & 4))
+                    CRC2(state->check, hold);
+                INITBITS();
+            }
+            else if (state->head != Z_NULL)
+                state->head->extra = Z_NULL;
+            state->mode = EXTRA;
+        case EXTRA:
+            if (state->flags & 0x0400) {
+                copy = state->length;
+                if (copy > have) copy = have;
+                if (copy) {
+                    if (state->head != Z_NULL &&
+                        state->head->extra != Z_NULL) {
+                        len = state->head->extra_len - state->length;
+                        zmemcpy(state->head->extra + len, next,
+                                len + copy > state->head->extra_max ?
+                                state->head->extra_max - len : copy);
+                    }
+                    if ((state->flags & 0x0200) && (state->wrap & 4))
+                        state->check = crc32(state->check, next, copy);
+                    have -= copy;
+                    next += copy;
+                    state->length -= copy;
+                }
+                if (state->length) goto inf_leave;
+            }
+            state->length = 0;
+            state->mode = NAME;
+        case NAME:
+            if (state->flags & 0x0800) {
+                if (have == 0) goto inf_leave;
+                copy = 0;
+                do {
+                    len = (unsigned)(next[copy++]);
+                    if (state->head != Z_NULL &&
+                            state->head->name != Z_NULL &&
+                            state->length < state->head->name_max)
+                        state->head->name[state->length++] = (Bytef)len;
+                } while (len && copy < have);
+                if ((state->flags & 0x0200) && (state->wrap & 4))
+                    state->check = crc32(state->check, next, copy);
+                have -= copy;
+                next += copy;
+                if (len) goto inf_leave;
+            }
+            else if (state->head != Z_NULL)
+                state->head->name = Z_NULL;
+            state->length = 0;
+            state->mode = COMMENT;
+        case COMMENT:
+            if (state->flags & 0x1000) {
+                if (have == 0) goto inf_leave;
+                copy = 0;
+                do {
+                    len = (unsigned)(next[copy++]);
+                    if (state->head != Z_NULL &&
+                            state->head->comment != Z_NULL &&
+                            state->length < state->head->comm_max)
+                        state->head->comment[state->length++] = (Bytef)len;
+                } while (len && copy < have);
+                if ((state->flags & 0x0200) && (state->wrap & 4))
+                    state->check = crc32(state->check, next, copy);
+                have -= copy;
+                next += copy;
+                if (len) goto inf_leave;
+            }
+            else if (state->head != Z_NULL)
+                state->head->comment = Z_NULL;
+            state->mode = HCRC;
+        case HCRC:
+            if (state->flags & 0x0200) {
+                NEEDBITS(16);
+                if ((state->wrap & 4) && hold != (state->check & 0xffff)) {
+                    strm->msg = (char *)"header crc mismatch";
+                    state->mode = BAD;
+                    break;
+                }
+                INITBITS();
+            }
+            if (state->head != Z_NULL) {
+                state->head->hcrc = (int)((state->flags >> 9) & 1);
+                state->head->done = 1;
+            }
+            strm->adler = state->check = crc32(0L, Z_NULL, 0);
+            state->mode = TYPE;
+            break;
+#endif
+        case DICTID:
+            NEEDBITS(32);
+            strm->adler = state->check = ZSWAP32(hold);
+            INITBITS();
+            state->mode = DICT;
+        case DICT:
+            if (state->havedict == 0) {
+                RESTORE();
+                return Z_NEED_DICT;
+            }
+            strm->adler = state->check = adler32(0L, Z_NULL, 0);
+            state->mode = TYPE;
+        case TYPE:
+            if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave;
+        case TYPEDO:
+            if (state->last) {
+                BYTEBITS();
+                state->mode = CHECK;
+                break;
+            }
+            NEEDBITS(3);
+            state->last = BITS(1);
+            DROPBITS(1);
+            switch (BITS(2)) {
+            case 0:                             /* stored block */
+                Tracev((stderr, "inflate:     stored block%s\n",
+                        state->last ? " (last)" : ""));
+                state->mode = STORED;
+                break;
+            case 1:                             /* fixed block */
+                fixedtables(state);
+                Tracev((stderr, "inflate:     fixed codes block%s\n",
+                        state->last ? " (last)" : ""));
+                state->mode = LEN_;             /* decode codes */
+                if (flush == Z_TREES) {
+                    DROPBITS(2);
+                    goto inf_leave;
+                }
+                break;
+            case 2:                             /* dynamic block */
+                Tracev((stderr, "inflate:     dynamic codes block%s\n",
+                        state->last ? " (last)" : ""));
+                state->mode = TABLE;
+                break;
+            case 3:
+                strm->msg = (char *)"invalid block type";
+                state->mode = BAD;
+            }
+            DROPBITS(2);
+            break;
+        case STORED:
+            BYTEBITS();                         /* go to byte boundary */
+            NEEDBITS(32);
+            if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) {
+                strm->msg = (char *)"invalid stored block lengths";
+                state->mode = BAD;
+                break;
+            }
+            state->length = (unsigned)hold & 0xffff;
+            Tracev((stderr, "inflate:       stored length %u\n",
+                    state->length));
+            INITBITS();
+            state->mode = COPY_;
+            if (flush == Z_TREES) goto inf_leave;
+        case COPY_:
+            state->mode = COPY;
+        case COPY:
+            copy = state->length;
+            if (copy) {
+                if (copy > have) copy = have;
+                if (copy > left) copy = left;
+                if (copy == 0) goto inf_leave;
+                zmemcpy(put, next, copy);
+                have -= copy;
+                next += copy;
+                left -= copy;
+                put += copy;
+                state->length -= copy;
+                break;
+            }
+            Tracev((stderr, "inflate:       stored end\n"));
+            state->mode = TYPE;
+            break;
+        case TABLE:
+            NEEDBITS(14);
+            state->nlen = BITS(5) + 257;
+            DROPBITS(5);
+            state->ndist = BITS(5) + 1;
+            DROPBITS(5);
+            state->ncode = BITS(4) + 4;
+            DROPBITS(4);
+#ifndef PKZIP_BUG_WORKAROUND
+            if (state->nlen > 286 || state->ndist > 30) {
+                strm->msg = (char *)"too many length or distance symbols";
+                state->mode = BAD;
+                break;
+            }
+#endif
+            Tracev((stderr, "inflate:       table sizes ok\n"));
+            state->have = 0;
+            state->mode = LENLENS;
+        case LENLENS:
+            while (state->have < state->ncode) {
+                NEEDBITS(3);
+                state->lens[order[state->have++]] = (unsigned short)BITS(3);
+                DROPBITS(3);
+            }
+            while (state->have < 19)
+                state->lens[order[state->have++]] = 0;
+            state->next = state->codes;
+            state->lencode = (const code FAR *)(state->next);
+            state->lenbits = 7;
+            ret = inflate_table(CODES, state->lens, 19, &(state->next),
+                                &(state->lenbits), state->work);
+            if (ret) {
+                strm->msg = (char *)"invalid code lengths set";
+                state->mode = BAD;
+                break;
+            }
+            Tracev((stderr, "inflate:       code lengths ok\n"));
+            state->have = 0;
+            state->mode = CODELENS;
+        case CODELENS:
+            while (state->have < state->nlen + state->ndist) {
+                for (;;) {
+                    here = state->lencode[BITS(state->lenbits)];
+                    if ((unsigned)(here.bits) <= bits) break;
+                    PULLBYTE();
+                }
+                if (here.val < 16) {
+                    DROPBITS(here.bits);
+                    state->lens[state->have++] = here.val;
+                }
+                else {
+                    if (here.val == 16) {
+                        NEEDBITS(here.bits + 2);
+                        DROPBITS(here.bits);
+                        if (state->have == 0) {
+                            strm->msg = (char *)"invalid bit length repeat";
+                            state->mode = BAD;
+                            break;
+                        }
+                        len = state->lens[state->have - 1];
+                        copy = 3 + BITS(2);
+                        DROPBITS(2);
+                    }
+                    else if (here.val == 17) {
+                        NEEDBITS(here.bits + 3);
+                        DROPBITS(here.bits);
+                        len = 0;
+                        copy = 3 + BITS(3);
+                        DROPBITS(3);
+                    }
+                    else {
+                        NEEDBITS(here.bits + 7);
+                        DROPBITS(here.bits);
+                        len = 0;
+                        copy = 11 + BITS(7);
+                        DROPBITS(7);
+                    }
+                    if (state->have + copy > state->nlen + state->ndist) {
+                        strm->msg = (char *)"invalid bit length repeat";
+                        state->mode = BAD;
+                        break;
+                    }
+                    while (copy--)
+                        state->lens[state->have++] = (unsigned short)len;
+                }
+            }
+
+            /* handle error breaks in while */
+            if (state->mode == BAD) break;
+
+            /* check for end-of-block code (better have one) */
+            if (state->lens[256] == 0) {
+                strm->msg = (char *)"invalid code -- missing end-of-block";
+                state->mode = BAD;
+                break;
+            }
+
+            /* build code tables -- note: do not change the lenbits or distbits
+               values here (9 and 6) without reading the comments in inftrees.h
+               concerning the ENOUGH constants, which depend on those values */
+            state->next = state->codes;
+            state->lencode = (const code FAR *)(state->next);
+            state->lenbits = 9;
+            ret = inflate_table(LENS, state->lens, state->nlen, &(state->next),
+                                &(state->lenbits), state->work);
+            if (ret) {
+                strm->msg = (char *)"invalid literal/lengths set";
+                state->mode = BAD;
+                break;
+            }
+            state->distcode = (const code FAR *)(state->next);
+            state->distbits = 6;
+            ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist,
+                            &(state->next), &(state->distbits), state->work);
+            if (ret) {
+                strm->msg = (char *)"invalid distances set";
+                state->mode = BAD;
+                break;
+            }
+            Tracev((stderr, "inflate:       codes ok\n"));
+            state->mode = LEN_;
+            if (flush == Z_TREES) goto inf_leave;
+        case LEN_:
+            state->mode = LEN;
+        case LEN:
+            if (have >= 6 && left >= 258) {
+                RESTORE();
+                inflate_fast(strm, out);
+                LOAD();
+                if (state->mode == TYPE)
+                    state->back = -1;
+                break;
+            }
+            state->back = 0;
+            for (;;) {
+                here = state->lencode[BITS(state->lenbits)];
+                if ((unsigned)(here.bits) <= bits) break;
+                PULLBYTE();
+            }
+            if (here.op && (here.op & 0xf0) == 0) {
+                last = here;
+                for (;;) {
+                    here = state->lencode[last.val +
+                            (BITS(last.bits + last.op) >> last.bits)];
+                    if ((unsigned)(last.bits + here.bits) <= bits) break;
+                    PULLBYTE();
+                }
+                DROPBITS(last.bits);
+                state->back += last.bits;
+            }
+            DROPBITS(here.bits);
+            state->back += here.bits;
+            state->length = (unsigned)here.val;
+            if ((int)(here.op) == 0) {
+                Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
+                        "inflate:         literal '%c'\n" :
+                        "inflate:         literal 0x%02x\n", here.val));
+                state->mode = LIT;
+                break;
+            }
+            if (here.op & 32) {
+                Tracevv((stderr, "inflate:         end of block\n"));
+                state->back = -1;
+                state->mode = TYPE;
+                break;
+            }
+            if (here.op & 64) {
+                strm->msg = (char *)"invalid literal/length code";
+                state->mode = BAD;
+                break;
+            }
+            state->extra = (unsigned)(here.op) & 15;
+            state->mode = LENEXT;
+        case LENEXT:
+            if (state->extra) {
+                NEEDBITS(state->extra);
+                state->length += BITS(state->extra);
+                DROPBITS(state->extra);
+                state->back += state->extra;
+            }
+            Tracevv((stderr, "inflate:         length %u\n", state->length));
+            state->was = state->length;
+            state->mode = DIST;
+        case DIST:
+            for (;;) {
+                here = state->distcode[BITS(state->distbits)];
+                if ((unsigned)(here.bits) <= bits) break;
+                PULLBYTE();
+            }
+            if ((here.op & 0xf0) == 0) {
+                last = here;
+                for (;;) {
+                    here = state->distcode[last.val +
+                            (BITS(last.bits + last.op) >> last.bits)];
+                    if ((unsigned)(last.bits + here.bits) <= bits) break;
+                    PULLBYTE();
+                }
+                DROPBITS(last.bits);
+                state->back += last.bits;
+            }
+            DROPBITS(here.bits);
+            state->back += here.bits;
+            if (here.op & 64) {
+                strm->msg = (char *)"invalid distance code";
+                state->mode = BAD;
+                break;
+            }
+            state->offset = (unsigned)here.val;
+            state->extra = (unsigned)(here.op) & 15;
+            state->mode = DISTEXT;
+        case DISTEXT:
+            if (state->extra) {
+                NEEDBITS(state->extra);
+                state->offset += BITS(state->extra);
+                DROPBITS(state->extra);
+                state->back += state->extra;
+            }
+#ifdef INFLATE_STRICT
+            if (state->offset > state->dmax) {
+                strm->msg = (char *)"invalid distance too far back";
+                state->mode = BAD;
+                break;
+            }
+#endif
+            Tracevv((stderr, "inflate:         distance %u\n", state->offset));
+            state->mode = MATCH;
+        case MATCH:
+            if (left == 0) goto inf_leave;
+            copy = out - left;
+            if (state->offset > copy) {         /* copy from window */
+                copy = state->offset - copy;
+                if (copy > state->whave) {
+                    if (state->sane) {
+                        strm->msg = (char *)"invalid distance too far back";
+                        state->mode = BAD;
+                        break;
+                    }
+#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
+                    Trace((stderr, "inflate.c too far\n"));
+                    copy -= state->whave;
+                    if (copy > state->length) copy = state->length;
+                    if (copy > left) copy = left;
+                    left -= copy;
+                    state->length -= copy;
+                    do {
+                        *put++ = 0;
+                    } while (--copy);
+                    if (state->length == 0) state->mode = LEN;
+                    break;
+#endif
+                }
+                if (copy > state->wnext) {
+                    copy -= state->wnext;
+                    from = state->window + (state->wsize - copy);
+                }
+                else
+                    from = state->window + (state->wnext - copy);
+                if (copy > state->length) copy = state->length;
+            }
+            else {                              /* copy from output */
+                from = put - state->offset;
+                copy = state->length;
+            }
+            if (copy > left) copy = left;
+            left -= copy;
+            state->length -= copy;
+            do {
+                *put++ = *from++;
+            } while (--copy);
+            if (state->length == 0) state->mode = LEN;
+            break;
+        case LIT:
+            if (left == 0) goto inf_leave;
+            *put++ = (unsigned char)(state->length);
+            left--;
+            state->mode = LEN;
+            break;
+        case CHECK:
+            if (state->wrap) {
+                NEEDBITS(32);
+                out -= left;
+                strm->total_out += out;
+                state->total += out;
+                if ((state->wrap & 4) && out)
+                    strm->adler = state->check =
+                        UPDATE(state->check, put - out, out);
+                out = left;
+                if ((state->wrap & 4) && (
+#ifdef GUNZIP
+                     state->flags ? hold :
+#endif
+                     ZSWAP32(hold)) != state->check) {
+                    strm->msg = (char *)"incorrect data check";
+                    state->mode = BAD;
+                    break;
+                }
+                INITBITS();
+                Tracev((stderr, "inflate:   check matches trailer\n"));
+            }
+#ifdef GUNZIP
+            state->mode = LENGTH;
+        case LENGTH:
+            if (state->wrap && state->flags) {
+                NEEDBITS(32);
+                if (hold != (state->total & 0xffffffffUL)) {
+                    strm->msg = (char *)"incorrect length check";
+                    state->mode = BAD;
+                    break;
+                }
+                INITBITS();
+                Tracev((stderr, "inflate:   length matches trailer\n"));
+            }
+#endif
+            state->mode = DONE;
+        case DONE:
+            ret = Z_STREAM_END;
+            goto inf_leave;
+        case BAD:
+            ret = Z_DATA_ERROR;
+            goto inf_leave;
+        case MEM:
+            return Z_MEM_ERROR;
+        case SYNC:
+        default:
+            return Z_STREAM_ERROR;
+        }
+
+    /*
+       Return from inflate(), updating the total counts and the check value.
+       If there was no progress during the inflate() call, return a buffer
+       error.  Call updatewindow() to create and/or update the window state.
+       Note: a memory error from inflate() is non-recoverable.
+     */
+  inf_leave:
+    RESTORE();
+    if (state->wsize || (out != strm->avail_out && state->mode < BAD &&
+            (state->mode < CHECK || flush != Z_FINISH)))
+        if (updatewindow(strm, strm->next_out, out - strm->avail_out)) {
+            state->mode = MEM;
+            return Z_MEM_ERROR;
+        }
+    in -= strm->avail_in;
+    out -= strm->avail_out;
+    strm->total_in += in;
+    strm->total_out += out;
+    state->total += out;
+    if ((state->wrap & 4) && out)
+        strm->adler = state->check =
+            UPDATE(state->check, strm->next_out - out, out);
+    strm->data_type = (int)state->bits + (state->last ? 64 : 0) +
+                      (state->mode == TYPE ? 128 : 0) +
+                      (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0);
+    if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK)
+        ret = Z_BUF_ERROR;
+    return ret;
+}
+
+int ZEXPORT inflateEnd(strm)
+z_streamp strm;
+{
+    struct inflate_state FAR *state;
+    if (inflateStateCheck(strm))
+        return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    if (state->window != Z_NULL) ZFREE(strm, state->window);
+    ZFREE(strm, strm->state);
+    strm->state = Z_NULL;
+    Tracev((stderr, "inflate: end\n"));
+    return Z_OK;
+}
+
+int ZEXPORT inflateGetDictionary(strm, dictionary, dictLength)
+z_streamp strm;
+Bytef *dictionary;
+uInt *dictLength;
+{
+    struct inflate_state FAR *state;
+
+    /* check state */
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+
+    /* copy dictionary */
+    if (state->whave && dictionary != Z_NULL) {
+        zmemcpy(dictionary, state->window + state->wnext,
+                state->whave - state->wnext);
+        zmemcpy(dictionary + state->whave - state->wnext,
+                state->window, state->wnext);
+    }
+    if (dictLength != Z_NULL)
+        *dictLength = state->whave;
+    return Z_OK;
+}
+
+int ZEXPORT inflateSetDictionary(strm, dictionary, dictLength)
+z_streamp strm;
+const Bytef *dictionary;
+uInt dictLength;
+{
+    struct inflate_state FAR *state;
+    unsigned long dictid;
+    int ret;
+
+    /* check state */
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    if (state->wrap != 0 && state->mode != DICT)
+        return Z_STREAM_ERROR;
+
+    /* check for correct dictionary identifier */
+    if (state->mode == DICT) {
+        dictid = adler32(0L, Z_NULL, 0);
+        dictid = adler32(dictid, dictionary, dictLength);
+        if (dictid != state->check)
+            return Z_DATA_ERROR;
+    }
+
+    /* copy dictionary to window using updatewindow(), which will amend the
+       existing dictionary if appropriate */
+    ret = updatewindow(strm, dictionary + dictLength, dictLength);
+    if (ret) {
+        state->mode = MEM;
+        return Z_MEM_ERROR;
+    }
+    state->havedict = 1;
+    Tracev((stderr, "inflate:   dictionary set\n"));
+    return Z_OK;
+}
+
+int ZEXPORT inflateGetHeader(strm, head)
+z_streamp strm;
+gz_headerp head;
+{
+    struct inflate_state FAR *state;
+
+    /* check state */
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    if ((state->wrap & 2) == 0) return Z_STREAM_ERROR;
+
+    /* save header structure */
+    state->head = head;
+    head->done = 0;
+    return Z_OK;
+}
+
+/*
+   Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff.  Return when found
+   or when out of input.  When called, *have is the number of pattern bytes
+   found in order so far, in 0..3.  On return *have is updated to the new
+   state.  If on return *have equals four, then the pattern was found and the
+   return value is how many bytes were read including the last byte of the
+   pattern.  If *have is less than four, then the pattern has not been found
+   yet and the return value is len.  In the latter case, syncsearch() can be
+   called again with more data and the *have state.  *have is initialized to
+   zero for the first call.
+ */
+local unsigned syncsearch(have, buf, len)
+unsigned FAR *have;
+const unsigned char FAR *buf;
+unsigned len;
+{
+    unsigned got;
+    unsigned next;
+
+    got = *have;
+    next = 0;
+    while (next < len && got < 4) {
+        if ((int)(buf[next]) == (got < 2 ? 0 : 0xff))
+            got++;
+        else if (buf[next])
+            got = 0;
+        else
+            got = 4 - got;
+        next++;
+    }
+    *have = got;
+    return next;
+}
+
+int ZEXPORT inflateSync(strm)
+z_streamp strm;
+{
+    unsigned len;               /* number of bytes to look at or looked at */
+    unsigned long in, out;      /* temporary to save total_in and total_out */
+    unsigned char buf[4];       /* to restore bit buffer to byte string */
+    struct inflate_state FAR *state;
+
+    /* check parameters */
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR;
+
+    /* if first time, start search in bit buffer */
+    if (state->mode != SYNC) {
+        state->mode = SYNC;
+        state->hold <<= state->bits & 7;
+        state->bits -= state->bits & 7;
+        len = 0;
+        while (state->bits >= 8) {
+            buf[len++] = (unsigned char)(state->hold);
+            state->hold >>= 8;
+            state->bits -= 8;
+        }
+        state->have = 0;
+        syncsearch(&(state->have), buf, len);
+    }
+
+    /* search available input */
+    len = syncsearch(&(state->have), strm->next_in, strm->avail_in);
+    strm->avail_in -= len;
+    strm->next_in += len;
+    strm->total_in += len;
+
+    /* return no joy or set up to restart inflate() on a new block */
+    if (state->have != 4) return Z_DATA_ERROR;
+    in = strm->total_in;  out = strm->total_out;
+    inflateReset(strm);
+    strm->total_in = in;  strm->total_out = out;
+    state->mode = TYPE;
+    return Z_OK;
+}
+
+/*
+   Returns true if inflate is currently at the end of a block generated by
+   Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP
+   implementation to provide an additional safety check. PPP uses
+   Z_SYNC_FLUSH but removes the length bytes of the resulting empty stored
+   block. When decompressing, PPP checks that at the end of input packet,
+   inflate is waiting for these length bytes.
+ */
+int ZEXPORT inflateSyncPoint(strm)
+z_streamp strm;
+{
+    struct inflate_state FAR *state;
+
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    return state->mode == STORED && state->bits == 0;
+}
+
+int ZEXPORT inflateCopy(dest, source)
+z_streamp dest;
+z_streamp source;
+{
+    struct inflate_state FAR *state;
+    struct inflate_state FAR *copy;
+    unsigned char FAR *window;
+    unsigned wsize;
+
+    /* check input */
+    if (inflateStateCheck(source) || dest == Z_NULL)
+        return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)source->state;
+
+    /* allocate space */
+    copy = (struct inflate_state FAR *)
+           ZALLOC(source, 1, sizeof(struct inflate_state));
+    if (copy == Z_NULL) return Z_MEM_ERROR;
+    window = Z_NULL;
+    if (state->window != Z_NULL) {
+        window = (unsigned char FAR *)
+                 ZALLOC(source, 1U << state->wbits, sizeof(unsigned char));
+        if (window == Z_NULL) {
+            ZFREE(source, copy);
+            return Z_MEM_ERROR;
+        }
+    }
+
+    /* copy state */
+    zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream));
+    zmemcpy((voidpf)copy, (voidpf)state, sizeof(struct inflate_state));
+    copy->strm = dest;
+    if (state->lencode >= state->codes &&
+        state->lencode <= state->codes + ENOUGH - 1) {
+        copy->lencode = copy->codes + (state->lencode - state->codes);
+        copy->distcode = copy->codes + (state->distcode - state->codes);
+    }
+    copy->next = copy->codes + (state->next - state->codes);
+    if (window != Z_NULL) {
+        wsize = 1U << state->wbits;
+        zmemcpy(window, state->window, wsize);
+    }
+    copy->window = window;
+    dest->state = (struct internal_state FAR *)copy;
+    return Z_OK;
+}
+
+int ZEXPORT inflateUndermine(strm, subvert)
+z_streamp strm;
+int subvert;
+{
+    struct inflate_state FAR *state;
+
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
+    state->sane = !subvert;
+    return Z_OK;
+#else
+    (void)subvert;
+    state->sane = 1;
+    return Z_DATA_ERROR;
+#endif
+}
+
+int ZEXPORT inflateValidate(strm, check)
+z_streamp strm;
+int check;
+{
+    struct inflate_state FAR *state;
+
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    if (check)
+        state->wrap |= 4;
+    else
+        state->wrap &= ~4;
+    return Z_OK;
+}
+
+long ZEXPORT inflateMark(strm)
+z_streamp strm;
+{
+    struct inflate_state FAR *state;
+
+    if (inflateStateCheck(strm))
+        return -(1L << 16);
+    state = (struct inflate_state FAR *)strm->state;
+    return (long)(((unsigned long)((long)state->back)) << 16) +
+        (state->mode == COPY ? state->length :
+            (state->mode == MATCH ? state->was - state->length : 0));
+}
+
+unsigned long ZEXPORT inflateCodesUsed(strm)
+z_streamp strm;
+{
+    struct inflate_state FAR *state;
+    if (inflateStateCheck(strm)) return (unsigned long)-1;
+    state = (struct inflate_state FAR *)strm->state;
+    return (unsigned long)(state->next - state->codes);
+}
diff --git a/libraries/zlib/inflate.h b/libraries/zlib/inflate.h
new file mode 100644
index 000000000..a46cce6b6
--- /dev/null
+++ b/libraries/zlib/inflate.h
@@ -0,0 +1,125 @@
+/* inflate.h -- internal inflate state definition
+ * Copyright (C) 1995-2016 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+   part of the implementation of the compression library and is
+   subject to change. Applications should only use zlib.h.
+ */
+
+/* define NO_GZIP when compiling if you want to disable gzip header and
+   trailer decoding by inflate().  NO_GZIP would be used to avoid linking in
+   the crc code when it is not needed.  For shared libraries, gzip decoding
+   should be left enabled. */
+#ifndef NO_GZIP
+#  define GUNZIP
+#endif
+
+/* Possible inflate modes between inflate() calls */
+typedef enum {
+    HEAD = 16180,   /* i: waiting for magic header */
+    FLAGS,      /* i: waiting for method and flags (gzip) */
+    TIME,       /* i: waiting for modification time (gzip) */
+    OS,         /* i: waiting for extra flags and operating system (gzip) */
+    EXLEN,      /* i: waiting for extra length (gzip) */
+    EXTRA,      /* i: waiting for extra bytes (gzip) */
+    NAME,       /* i: waiting for end of file name (gzip) */
+    COMMENT,    /* i: waiting for end of comment (gzip) */
+    HCRC,       /* i: waiting for header crc (gzip) */
+    DICTID,     /* i: waiting for dictionary check value */
+    DICT,       /* waiting for inflateSetDictionary() call */
+        TYPE,       /* i: waiting for type bits, including last-flag bit */
+        TYPEDO,     /* i: same, but skip check to exit inflate on new block */
+        STORED,     /* i: waiting for stored size (length and complement) */
+        COPY_,      /* i/o: same as COPY below, but only first time in */
+        COPY,       /* i/o: waiting for input or output to copy stored block */
+        TABLE,      /* i: waiting for dynamic block table lengths */
+        LENLENS,    /* i: waiting for code length code lengths */
+        CODELENS,   /* i: waiting for length/lit and distance code lengths */
+            LEN_,       /* i: same as LEN below, but only first time in */
+            LEN,        /* i: waiting for length/lit/eob code */
+            LENEXT,     /* i: waiting for length extra bits */
+            DIST,       /* i: waiting for distance code */
+            DISTEXT,    /* i: waiting for distance extra bits */
+            MATCH,      /* o: waiting for output space to copy string */
+            LIT,        /* o: waiting for output space to write literal */
+    CHECK,      /* i: waiting for 32-bit check value */
+    LENGTH,     /* i: waiting for 32-bit length (gzip) */
+    DONE,       /* finished check, done -- remain here until reset */
+    BAD,        /* got a data error -- remain here until reset */
+    MEM,        /* got an inflate() memory error -- remain here until reset */
+    SYNC        /* looking for synchronization bytes to restart inflate() */
+} inflate_mode;
+
+/*
+    State transitions between above modes -
+
+    (most modes can go to BAD or MEM on error -- not shown for clarity)
+
+    Process header:
+        HEAD -> (gzip) or (zlib) or (raw)
+        (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME -> COMMENT ->
+                  HCRC -> TYPE
+        (zlib) -> DICTID or TYPE
+        DICTID -> DICT -> TYPE
+        (raw) -> TYPEDO
+    Read deflate blocks:
+            TYPE -> TYPEDO -> STORED or TABLE or LEN_ or CHECK
+            STORED -> COPY_ -> COPY -> TYPE
+            TABLE -> LENLENS -> CODELENS -> LEN_
+            LEN_ -> LEN
+    Read deflate codes in fixed or dynamic block:
+                LEN -> LENEXT or LIT or TYPE
+                LENEXT -> DIST -> DISTEXT -> MATCH -> LEN
+                LIT -> LEN
+    Process trailer:
+        CHECK -> LENGTH -> DONE
+ */
+
+/* State maintained between inflate() calls -- approximately 7K bytes, not
+   including the allocated sliding window, which is up to 32K bytes. */
+struct inflate_state {
+    z_streamp strm;             /* pointer back to this zlib stream */
+    inflate_mode mode;          /* current inflate mode */
+    int last;                   /* true if processing last block */
+    int wrap;                   /* bit 0 true for zlib, bit 1 true for gzip,
+                                   bit 2 true to validate check value */
+    int havedict;               /* true if dictionary provided */
+    int flags;                  /* gzip header method and flags (0 if zlib) */
+    unsigned dmax;              /* zlib header max distance (INFLATE_STRICT) */
+    unsigned long check;        /* protected copy of check value */
+    unsigned long total;        /* protected copy of output count */
+    gz_headerp head;            /* where to save gzip header information */
+        /* sliding window */
+    unsigned wbits;             /* log base 2 of requested window size */
+    unsigned wsize;             /* window size or zero if not using window */
+    unsigned whave;             /* valid bytes in the window */
+    unsigned wnext;             /* window write index */
+    unsigned char FAR *window;  /* allocated sliding window, if needed */
+        /* bit accumulator */
+    unsigned long hold;         /* input bit accumulator */
+    unsigned bits;              /* number of bits in "in" */
+        /* for string and stored block copying */
+    unsigned length;            /* literal or length of data to copy */
+    unsigned offset;            /* distance back to copy string from */
+        /* for table and code decoding */
+    unsigned extra;             /* extra bits needed */
+        /* fixed and dynamic code tables */
+    code const FAR *lencode;    /* starting table for length/literal codes */
+    code const FAR *distcode;   /* starting table for distance codes */
+    unsigned lenbits;           /* index bits for lencode */
+    unsigned distbits;          /* index bits for distcode */
+        /* dynamic table building */
+    unsigned ncode;             /* number of code length code lengths */
+    unsigned nlen;              /* number of length code lengths */
+    unsigned ndist;             /* number of distance code lengths */
+    unsigned have;              /* number of code lengths in lens[] */
+    code FAR *next;             /* next available space in codes[] */
+    unsigned short lens[320];   /* temporary storage for code lengths */
+    unsigned short work[288];   /* work area for code table building */
+    code codes[ENOUGH];         /* space for code tables */
+    int sane;                   /* if false, allow invalid distance too far */
+    int back;                   /* bits back of last unprocessed length/lit */
+    unsigned was;               /* initial length of match */
+};
diff --git a/libraries/zlib/inftrees.c b/libraries/zlib/inftrees.c
new file mode 100644
index 000000000..2ea08fc13
--- /dev/null
+++ b/libraries/zlib/inftrees.c
@@ -0,0 +1,304 @@
+/* inftrees.c -- generate Huffman trees for efficient decoding
+ * Copyright (C) 1995-2017 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "zutil.h"
+#include "inftrees.h"
+
+#define MAXBITS 15
+
+const char inflate_copyright[] =
+   " inflate 1.2.11 Copyright 1995-2017 Mark Adler ";
+/*
+  If you use the zlib library in a product, an acknowledgment is welcome
+  in the documentation of your product. If for some reason you cannot
+  include such an acknowledgment, I would appreciate that you keep this
+  copyright string in the executable of your product.
+ */
+
+/*
+   Build a set of tables to decode the provided canonical Huffman code.
+   The code lengths are lens[0..codes-1].  The result starts at *table,
+   whose indices are 0..2^bits-1.  work is a writable array of at least
+   lens shorts, which is used as a work area.  type is the type of code
+   to be generated, CODES, LENS, or DISTS.  On return, zero is success,
+   -1 is an invalid code, and +1 means that ENOUGH isn't enough.  table
+   on return points to the next available entry's address.  bits is the
+   requested root table index bits, and on return it is the actual root
+   table index bits.  It will differ if the request is greater than the
+   longest code or if it is less than the shortest code.
+ */
+int ZLIB_INTERNAL inflate_table(type, lens, codes, table, bits, work)
+codetype type;
+unsigned short FAR *lens;
+unsigned codes;
+code FAR * FAR *table;
+unsigned FAR *bits;
+unsigned short FAR *work;
+{
+    unsigned len;               /* a code's length in bits */
+    unsigned sym;               /* index of code symbols */
+    unsigned min, max;          /* minimum and maximum code lengths */
+    unsigned root;              /* number of index bits for root table */
+    unsigned curr;              /* number of index bits for current table */
+    unsigned drop;              /* code bits to drop for sub-table */
+    int left;                   /* number of prefix codes available */
+    unsigned used;              /* code entries in table used */
+    unsigned huff;              /* Huffman code */
+    unsigned incr;              /* for incrementing code, index */
+    unsigned fill;              /* index for replicating entries */
+    unsigned low;               /* low bits for current root entry */
+    unsigned mask;              /* mask for low root bits */
+    code here;                  /* table entry for duplication */
+    code FAR *next;             /* next available space in table */
+    const unsigned short FAR *base;     /* base value table to use */
+    const unsigned short FAR *extra;    /* extra bits table to use */
+    unsigned match;             /* use base and extra for symbol >= match */
+    unsigned short count[MAXBITS+1];    /* number of codes of each length */
+    unsigned short offs[MAXBITS+1];     /* offsets in table for each length */
+    static const unsigned short lbase[31] = { /* Length codes 257..285 base */
+        3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
+        35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
+    static const unsigned short lext[31] = { /* Length codes 257..285 extra */
+        16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
+        19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 77, 202};
+    static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
+        1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
+        257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
+        8193, 12289, 16385, 24577, 0, 0};
+    static const unsigned short dext[32] = { /* Distance codes 0..29 extra */
+        16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
+        23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
+        28, 28, 29, 29, 64, 64};
+
+    /*
+       Process a set of code lengths to create a canonical Huffman code.  The
+       code lengths are lens[0..codes-1].  Each length corresponds to the
+       symbols 0..codes-1.  The Huffman code is generated by first sorting the
+       symbols by length from short to long, and retaining the symbol order
+       for codes with equal lengths.  Then the code starts with all zero bits
+       for the first code of the shortest length, and the codes are integer
+       increments for the same length, and zeros are appended as the length
+       increases.  For the deflate format, these bits are stored backwards
+       from their more natural integer increment ordering, and so when the
+       decoding tables are built in the large loop below, the integer codes
+       are incremented backwards.
+
+       This routine assumes, but does not check, that all of the entries in
+       lens[] are in the range 0..MAXBITS.  The caller must assure this.
+       1..MAXBITS is interpreted as that code length.  zero means that that
+       symbol does not occur in this code.
+
+       The codes are sorted by computing a count of codes for each length,
+       creating from that a table of starting indices for each length in the
+       sorted table, and then entering the symbols in order in the sorted
+       table.  The sorted table is work[], with that space being provided by
+       the caller.
+
+       The length counts are used for other purposes as well, i.e. finding
+       the minimum and maximum length codes, determining if there are any
+       codes at all, checking for a valid set of lengths, and looking ahead
+       at length counts to determine sub-table sizes when building the
+       decoding tables.
+     */
+
+    /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
+    for (len = 0; len <= MAXBITS; len++)
+        count[len] = 0;
+    for (sym = 0; sym < codes; sym++)
+        count[lens[sym]]++;
+
+    /* bound code lengths, force root to be within code lengths */
+    root = *bits;
+    for (max = MAXBITS; max >= 1; max--)
+        if (count[max] != 0) break;
+    if (root > max) root = max;
+    if (max == 0) {                     /* no symbols to code at all */
+        here.op = (unsigned char)64;    /* invalid code marker */
+        here.bits = (unsigned char)1;
+        here.val = (unsigned short)0;
+        *(*table)++ = here;             /* make a table to force an error */
+        *(*table)++ = here;
+        *bits = 1;
+        return 0;     /* no symbols, but wait for decoding to report error */
+    }
+    for (min = 1; min < max; min++)
+        if (count[min] != 0) break;
+    if (root < min) root = min;
+
+    /* check for an over-subscribed or incomplete set of lengths */
+    left = 1;
+    for (len = 1; len <= MAXBITS; len++) {
+        left <<= 1;
+        left -= count[len];
+        if (left < 0) return -1;        /* over-subscribed */
+    }
+    if (left > 0 && (type == CODES || max != 1))
+        return -1;                      /* incomplete set */
+
+    /* generate offsets into symbol table for each length for sorting */
+    offs[1] = 0;
+    for (len = 1; len < MAXBITS; len++)
+        offs[len + 1] = offs[len] + count[len];
+
+    /* sort symbols by length, by symbol order within each length */
+    for (sym = 0; sym < codes; sym++)
+        if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
+
+    /*
+       Create and fill in decoding tables.  In this loop, the table being
+       filled is at next and has curr index bits.  The code being used is huff
+       with length len.  That code is converted to an index by dropping drop
+       bits off of the bottom.  For codes where len is less than drop + curr,
+       those top drop + curr - len bits are incremented through all values to
+       fill the table with replicated entries.
+
+       root is the number of index bits for the root table.  When len exceeds
+       root, sub-tables are created pointed to by the root entry with an index
+       of the low root bits of huff.  This is saved in low to check for when a
+       new sub-table should be started.  drop is zero when the root table is
+       being filled, and drop is root when sub-tables are being filled.
+
+       When a new sub-table is needed, it is necessary to look ahead in the
+       code lengths to determine what size sub-table is needed.  The length
+       counts are used for this, and so count[] is decremented as codes are
+       entered in the tables.
+
+       used keeps track of how many table entries have been allocated from the
+       provided *table space.  It is checked for LENS and DIST tables against
+       the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in
+       the initial root table size constants.  See the comments in inftrees.h
+       for more information.
+
+       sym increments through all symbols, and the loop terminates when
+       all codes of length max, i.e. all codes, have been processed.  This
+       routine permits incomplete codes, so another loop after this one fills
+       in the rest of the decoding tables with invalid code markers.
+     */
+
+    /* set up for code type */
+    switch (type) {
+    case CODES:
+        base = extra = work;    /* dummy value--not used */
+        match = 20;
+        break;
+    case LENS:
+        base = lbase;
+        extra = lext;
+        match = 257;
+        break;
+    default:    /* DISTS */
+        base = dbase;
+        extra = dext;
+        match = 0;
+    }
+
+    /* initialize state for loop */
+    huff = 0;                   /* starting code */
+    sym = 0;                    /* starting code symbol */
+    len = min;                  /* starting code length */
+    next = *table;              /* current table to fill in */
+    curr = root;                /* current table index bits */
+    drop = 0;                   /* current bits to drop from code for index */
+    low = (unsigned)(-1);       /* trigger new sub-table when len > root */
+    used = 1U << root;          /* use root table entries */
+    mask = used - 1;            /* mask for comparing low */
+
+    /* check available table space */
+    if ((type == LENS && used > ENOUGH_LENS) ||
+        (type == DISTS && used > ENOUGH_DISTS))
+        return 1;
+
+    /* process all codes and make table entries */
+    for (;;) {
+        /* create table entry */
+        here.bits = (unsigned char)(len - drop);
+        if (work[sym] + 1U < match) {
+            here.op = (unsigned char)0;
+            here.val = work[sym];
+        }
+        else if (work[sym] >= match) {
+            here.op = (unsigned char)(extra[work[sym] - match]);
+            here.val = base[work[sym] - match];
+        }
+        else {
+            here.op = (unsigned char)(32 + 64);         /* end of block */
+            here.val = 0;
+        }
+
+        /* replicate for those indices with low len bits equal to huff */
+        incr = 1U << (len - drop);
+        fill = 1U << curr;
+        min = fill;                 /* save offset to next table */
+        do {
+            fill -= incr;
+            next[(huff >> drop) + fill] = here;
+        } while (fill != 0);
+
+        /* backwards increment the len-bit code huff */
+        incr = 1U << (len - 1);
+        while (huff & incr)
+            incr >>= 1;
+        if (incr != 0) {
+            huff &= incr - 1;
+            huff += incr;
+        }
+        else
+            huff = 0;
+
+        /* go to next symbol, update count, len */
+        sym++;
+        if (--(count[len]) == 0) {
+            if (len == max) break;
+            len = lens[work[sym]];
+        }
+
+        /* create new sub-table if needed */
+        if (len > root && (huff & mask) != low) {
+            /* if first time, transition to sub-tables */
+            if (drop == 0)
+                drop = root;
+
+            /* increment past last table */
+            next += min;            /* here min is 1 << curr */
+
+            /* determine length of next table */
+            curr = len - drop;
+            left = (int)(1 << curr);
+            while (curr + drop < max) {
+                left -= count[curr + drop];
+                if (left <= 0) break;
+                curr++;
+                left <<= 1;
+            }
+
+            /* check for enough space */
+            used += 1U << curr;
+            if ((type == LENS && used > ENOUGH_LENS) ||
+                (type == DISTS && used > ENOUGH_DISTS))
+                return 1;
+
+            /* point entry in root table to sub-table */
+            low = huff & mask;
+            (*table)[low].op = (unsigned char)curr;
+            (*table)[low].bits = (unsigned char)root;
+            (*table)[low].val = (unsigned short)(next - *table);
+        }
+    }
+
+    /* fill in remaining table entry if code is incomplete (guaranteed to have
+       at most one remaining entry, since if the code is incomplete, the
+       maximum code length that was allowed to get this far is one bit) */
+    if (huff != 0) {
+        here.op = (unsigned char)64;            /* invalid code marker */
+        here.bits = (unsigned char)(len - drop);
+        here.val = (unsigned short)0;
+        next[huff] = here;
+    }
+
+    /* set return parameters */
+    *table += used;
+    *bits = root;
+    return 0;
+}
diff --git a/libraries/zlib/inftrees.h b/libraries/zlib/inftrees.h
new file mode 100644
index 000000000..baa53a0b1
--- /dev/null
+++ b/libraries/zlib/inftrees.h
@@ -0,0 +1,62 @@
+/* inftrees.h -- header to use inftrees.c
+ * Copyright (C) 1995-2005, 2010 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+   part of the implementation of the compression library and is
+   subject to change. Applications should only use zlib.h.
+ */
+
+/* Structure for decoding tables.  Each entry provides either the
+   information needed to do the operation requested by the code that
+   indexed that table entry, or it provides a pointer to another
+   table that indexes more bits of the code.  op indicates whether
+   the entry is a pointer to another table, a literal, a length or
+   distance, an end-of-block, or an invalid code.  For a table
+   pointer, the low four bits of op is the number of index bits of
+   that table.  For a length or distance, the low four bits of op
+   is the number of extra bits to get after the code.  bits is
+   the number of bits in this code or part of the code to drop off
+   of the bit buffer.  val is the actual byte to output in the case
+   of a literal, the base length or distance, or the offset from
+   the current table to the next table.  Each entry is four bytes. */
+typedef struct {
+    unsigned char op;           /* operation, extra bits, table bits */
+    unsigned char bits;         /* bits in this part of the code */
+    unsigned short val;         /* offset in table or code value */
+} code;
+
+/* op values as set by inflate_table():
+    00000000 - literal
+    0000tttt - table link, tttt != 0 is the number of table index bits
+    0001eeee - length or distance, eeee is the number of extra bits
+    01100000 - end of block
+    01000000 - invalid code
+ */
+
+/* Maximum size of the dynamic table.  The maximum number of code structures is
+   1444, which is the sum of 852 for literal/length codes and 592 for distance
+   codes.  These values were found by exhaustive searches using the program
+   examples/enough.c found in the zlib distribtution.  The arguments to that
+   program are the number of symbols, the initial root table size, and the
+   maximum bit length of a code.  "enough 286 9 15" for literal/length codes
+   returns returns 852, and "enough 30 6 15" for distance codes returns 592.
+   The initial root table size (9 or 6) is found in the fifth argument of the
+   inflate_table() calls in inflate.c and infback.c.  If the root table size is
+   changed, then these maximum sizes would be need to be recalculated and
+   updated. */
+#define ENOUGH_LENS 852
+#define ENOUGH_DISTS 592
+#define ENOUGH (ENOUGH_LENS+ENOUGH_DISTS)
+
+/* Type of code to build for inflate_table() */
+typedef enum {
+    CODES,
+    LENS,
+    DISTS
+} codetype;
+
+int ZLIB_INTERNAL inflate_table OF((codetype type, unsigned short FAR *lens,
+                             unsigned codes, code FAR * FAR *table,
+                             unsigned FAR *bits, unsigned short FAR *work));
diff --git a/libraries/zlib/trees.c b/libraries/zlib/trees.c
new file mode 100644
index 000000000..50cf4b457
--- /dev/null
+++ b/libraries/zlib/trees.c
@@ -0,0 +1,1203 @@
+/* trees.c -- output deflated data using Huffman coding
+ * Copyright (C) 1995-2017 Jean-loup Gailly
+ * detect_data_type() function provided freely by Cosmin Truta, 2006
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/*
+ *  ALGORITHM
+ *
+ *      The "deflation" process uses several Huffman trees. The more
+ *      common source values are represented by shorter bit sequences.
+ *
+ *      Each code tree is stored in a compressed form which is itself
+ * a Huffman encoding of the lengths of all the code strings (in
+ * ascending order by source values).  The actual code strings are
+ * reconstructed from the lengths in the inflate process, as described
+ * in the deflate specification.
+ *
+ *  REFERENCES
+ *
+ *      Deutsch, L.P.,"'Deflate' Compressed Data Format Specification".
+ *      Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc
+ *
+ *      Storer, James A.
+ *          Data Compression:  Methods and Theory, pp. 49-50.
+ *          Computer Science Press, 1988.  ISBN 0-7167-8156-5.
+ *
+ *      Sedgewick, R.
+ *          Algorithms, p290.
+ *          Addison-Wesley, 1983. ISBN 0-201-06672-6.
+ */
+
+/* @(#) $Id$ */
+
+/* #define GEN_TREES_H */
+
+#include "deflate.h"
+
+#ifdef ZLIB_DEBUG
+#  include <ctype.h>
+#endif
+
+/* ===========================================================================
+ * Constants
+ */
+
+#define MAX_BL_BITS 7
+/* Bit length codes must not exceed MAX_BL_BITS bits */
+
+#define END_BLOCK 256
+/* end of block literal code */
+
+#define REP_3_6      16
+/* repeat previous bit length 3-6 times (2 bits of repeat count) */
+
+#define REPZ_3_10    17
+/* repeat a zero length 3-10 times  (3 bits of repeat count) */
+
+#define REPZ_11_138  18
+/* repeat a zero length 11-138 times  (7 bits of repeat count) */
+
+local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */
+   = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};
+
+local const int extra_dbits[D_CODES] /* extra bits for each distance code */
+   = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
+
+local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */
+   = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};
+
+local const uch bl_order[BL_CODES]
+   = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
+/* The lengths of the bit length codes are sent in order of decreasing
+ * probability, to avoid transmitting the lengths for unused bit length codes.
+ */
+
+/* ===========================================================================
+ * Local data. These are initialized only once.
+ */
+
+#define DIST_CODE_LEN  512 /* see definition of array dist_code below */
+
+#if defined(GEN_TREES_H) || !defined(STDC)
+/* non ANSI compilers may not accept trees.h */
+
+local ct_data static_ltree[L_CODES+2];
+/* The static literal tree. Since the bit lengths are imposed, there is no
+ * need for the L_CODES extra codes used during heap construction. However
+ * The codes 286 and 287 are needed to build a canonical tree (see _tr_init
+ * below).
+ */
+
+local ct_data static_dtree[D_CODES];
+/* The static distance tree. (Actually a trivial tree since all codes use
+ * 5 bits.)
+ */
+
+uch _dist_code[DIST_CODE_LEN];
+/* Distance codes. The first 256 values correspond to the distances
+ * 3 .. 258, the last 256 values correspond to the top 8 bits of
+ * the 15 bit distances.
+ */
+
+uch _length_code[MAX_MATCH-MIN_MATCH+1];
+/* length code for each normalized match length (0 == MIN_MATCH) */
+
+local int base_length[LENGTH_CODES];
+/* First normalized length for each code (0 = MIN_MATCH) */
+
+local int base_dist[D_CODES];
+/* First normalized distance for each code (0 = distance of 1) */
+
+#else
+#  include "trees.h"
+#endif /* GEN_TREES_H */
+
+struct static_tree_desc_s {
+    const ct_data *static_tree;  /* static tree or NULL */
+    const intf *extra_bits;      /* extra bits for each code or NULL */
+    int     extra_base;          /* base index for extra_bits */
+    int     elems;               /* max number of elements in the tree */
+    int     max_length;          /* max bit length for the codes */
+};
+
+local const static_tree_desc  static_l_desc =
+{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS};
+
+local const static_tree_desc  static_d_desc =
+{static_dtree, extra_dbits, 0,          D_CODES, MAX_BITS};
+
+local const static_tree_desc  static_bl_desc =
+{(const ct_data *)0, extra_blbits, 0,   BL_CODES, MAX_BL_BITS};
+
+/* ===========================================================================
+ * Local (static) routines in this file.
+ */
+
+local void tr_static_init OF((void));
+local void init_block     OF((deflate_state *s));
+local void pqdownheap     OF((deflate_state *s, ct_data *tree, int k));
+local void gen_bitlen     OF((deflate_state *s, tree_desc *desc));
+local void gen_codes      OF((ct_data *tree, int max_code, ushf *bl_count));
+local void build_tree     OF((deflate_state *s, tree_desc *desc));
+local void scan_tree      OF((deflate_state *s, ct_data *tree, int max_code));
+local void send_tree      OF((deflate_state *s, ct_data *tree, int max_code));
+local int  build_bl_tree  OF((deflate_state *s));
+local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes,
+                              int blcodes));
+local void compress_block OF((deflate_state *s, const ct_data *ltree,
+                              const ct_data *dtree));
+local int  detect_data_type OF((deflate_state *s));
+local unsigned bi_reverse OF((unsigned value, int length));
+local void bi_windup      OF((deflate_state *s));
+local void bi_flush       OF((deflate_state *s));
+
+#ifdef GEN_TREES_H
+local void gen_trees_header OF((void));
+#endif
+
+#ifndef ZLIB_DEBUG
+#  define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len)
+   /* Send a code of the given tree. c and tree must not have side effects */
+
+#else /* !ZLIB_DEBUG */
+#  define send_code(s, c, tree) \
+     { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \
+       send_bits(s, tree[c].Code, tree[c].Len); }
+#endif
+
+/* ===========================================================================
+ * Output a short LSB first on the stream.
+ * IN assertion: there is enough room in pendingBuf.
+ */
+#define put_short(s, w) { \
+    put_byte(s, (uch)((w) & 0xff)); \
+    put_byte(s, (uch)((ush)(w) >> 8)); \
+}
+
+/* ===========================================================================
+ * Send a value on a given number of bits.
+ * IN assertion: length <= 16 and value fits in length bits.
+ */
+#ifdef ZLIB_DEBUG
+local void send_bits      OF((deflate_state *s, int value, int length));
+
+local void send_bits(s, value, length)
+    deflate_state *s;
+    int value;  /* value to send */
+    int length; /* number of bits */
+{
+    Tracevv((stderr," l %2d v %4x ", length, value));
+    Assert(length > 0 && length <= 15, "invalid length");
+    s->bits_sent += (ulg)length;
+
+    /* If not enough room in bi_buf, use (valid) bits from bi_buf and
+     * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid))
+     * unused bits in value.
+     */
+    if (s->bi_valid > (int)Buf_size - length) {
+        s->bi_buf |= (ush)value << s->bi_valid;
+        put_short(s, s->bi_buf);
+        s->bi_buf = (ush)value >> (Buf_size - s->bi_valid);
+        s->bi_valid += length - Buf_size;
+    } else {
+        s->bi_buf |= (ush)value << s->bi_valid;
+        s->bi_valid += length;
+    }
+}
+#else /* !ZLIB_DEBUG */
+
+#define send_bits(s, value, length) \
+{ int len = length;\
+  if (s->bi_valid > (int)Buf_size - len) {\
+    int val = (int)value;\
+    s->bi_buf |= (ush)val << s->bi_valid;\
+    put_short(s, s->bi_buf);\
+    s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\
+    s->bi_valid += len - Buf_size;\
+  } else {\
+    s->bi_buf |= (ush)(value) << s->bi_valid;\
+    s->bi_valid += len;\
+  }\
+}
+#endif /* ZLIB_DEBUG */
+
+
+/* the arguments must not have side effects */
+
+/* ===========================================================================
+ * Initialize the various 'constant' tables.
+ */
+local void tr_static_init()
+{
+#if defined(GEN_TREES_H) || !defined(STDC)
+    static int static_init_done = 0;
+    int n;        /* iterates over tree elements */
+    int bits;     /* bit counter */
+    int length;   /* length value */
+    int code;     /* code value */
+    int dist;     /* distance index */
+    ush bl_count[MAX_BITS+1];
+    /* number of codes at each bit length for an optimal tree */
+
+    if (static_init_done) return;
+
+    /* For some embedded targets, global variables are not initialized: */
+#ifdef NO_INIT_GLOBAL_POINTERS
+    static_l_desc.static_tree = static_ltree;
+    static_l_desc.extra_bits = extra_lbits;
+    static_d_desc.static_tree = static_dtree;
+    static_d_desc.extra_bits = extra_dbits;
+    static_bl_desc.extra_bits = extra_blbits;
+#endif
+
+    /* Initialize the mapping length (0..255) -> length code (0..28) */
+    length = 0;
+    for (code = 0; code < LENGTH_CODES-1; code++) {
+        base_length[code] = length;
+        for (n = 0; n < (1<<extra_lbits[code]); n++) {
+            _length_code[length++] = (uch)code;
+        }
+    }
+    Assert (length == 256, "tr_static_init: length != 256");
+    /* Note that the length 255 (match length 258) can be represented
+     * in two different ways: code 284 + 5 bits or code 285, so we
+     * overwrite length_code[255] to use the best encoding:
+     */
+    _length_code[length-1] = (uch)code;
+
+    /* Initialize the mapping dist (0..32K) -> dist code (0..29) */
+    dist = 0;
+    for (code = 0 ; code < 16; code++) {
+        base_dist[code] = dist;
+        for (n = 0; n < (1<<extra_dbits[code]); n++) {
+            _dist_code[dist++] = (uch)code;
+        }
+    }
+    Assert (dist == 256, "tr_static_init: dist != 256");
+    dist >>= 7; /* from now on, all distances are divided by 128 */
+    for ( ; code < D_CODES; code++) {
+        base_dist[code] = dist << 7;
+        for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) {
+            _dist_code[256 + dist++] = (uch)code;
+        }
+    }
+    Assert (dist == 256, "tr_static_init: 256+dist != 512");
+
+    /* Construct the codes of the static literal tree */
+    for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0;
+    n = 0;
+    while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++;
+    while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++;
+    while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++;
+    while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++;
+    /* Codes 286 and 287 do not exist, but we must include them in the
+     * tree construction to get a canonical Huffman tree (longest code
+     * all ones)
+     */
+    gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count);
+
+    /* The static distance tree is trivial: */
+    for (n = 0; n < D_CODES; n++) {
+        static_dtree[n].Len = 5;
+        static_dtree[n].Code = bi_reverse((unsigned)n, 5);
+    }
+    static_init_done = 1;
+
+#  ifdef GEN_TREES_H
+    gen_trees_header();
+#  endif
+#endif /* defined(GEN_TREES_H) || !defined(STDC) */
+}
+
+/* ===========================================================================
+ * Genererate the file trees.h describing the static trees.
+ */
+#ifdef GEN_TREES_H
+#  ifndef ZLIB_DEBUG
+#    include <stdio.h>
+#  endif
+
+#  define SEPARATOR(i, last, width) \
+      ((i) == (last)? "\n};\n\n" :    \
+       ((i) % (width) == (width)-1 ? ",\n" : ", "))
+
+void gen_trees_header()
+{
+    FILE *header = fopen("trees.h", "w");
+    int i;
+
+    Assert (header != NULL, "Can't open trees.h");
+    fprintf(header,
+            "/* header created automatically with -DGEN_TREES_H */\n\n");
+
+    fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n");
+    for (i = 0; i < L_CODES+2; i++) {
+        fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code,
+                static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5));
+    }
+
+    fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n");
+    for (i = 0; i < D_CODES; i++) {
+        fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code,
+                static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5));
+    }
+
+    fprintf(header, "const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {\n");
+    for (i = 0; i < DIST_CODE_LEN; i++) {
+        fprintf(header, "%2u%s", _dist_code[i],
+                SEPARATOR(i, DIST_CODE_LEN-1, 20));
+    }
+
+    fprintf(header,
+        "const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {\n");
+    for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) {
+        fprintf(header, "%2u%s", _length_code[i],
+                SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20));
+    }
+
+    fprintf(header, "local const int base_length[LENGTH_CODES] = {\n");
+    for (i = 0; i < LENGTH_CODES; i++) {
+        fprintf(header, "%1u%s", base_length[i],
+                SEPARATOR(i, LENGTH_CODES-1, 20));
+    }
+
+    fprintf(header, "local const int base_dist[D_CODES] = {\n");
+    for (i = 0; i < D_CODES; i++) {
+        fprintf(header, "%5u%s", base_dist[i],
+                SEPARATOR(i, D_CODES-1, 10));
+    }
+
+    fclose(header);
+}
+#endif /* GEN_TREES_H */
+
+/* ===========================================================================
+ * Initialize the tree data structures for a new zlib stream.
+ */
+void ZLIB_INTERNAL _tr_init(s)
+    deflate_state *s;
+{
+    tr_static_init();
+
+    s->l_desc.dyn_tree = s->dyn_ltree;
+    s->l_desc.stat_desc = &static_l_desc;
+
+    s->d_desc.dyn_tree = s->dyn_dtree;
+    s->d_desc.stat_desc = &static_d_desc;
+
+    s->bl_desc.dyn_tree = s->bl_tree;
+    s->bl_desc.stat_desc = &static_bl_desc;
+
+    s->bi_buf = 0;
+    s->bi_valid = 0;
+#ifdef ZLIB_DEBUG
+    s->compressed_len = 0L;
+    s->bits_sent = 0L;
+#endif
+
+    /* Initialize the first block of the first file: */
+    init_block(s);
+}
+
+/* ===========================================================================
+ * Initialize a new block.
+ */
+local void init_block(s)
+    deflate_state *s;
+{
+    int n; /* iterates over tree elements */
+
+    /* Initialize the trees. */
+    for (n = 0; n < L_CODES;  n++) s->dyn_ltree[n].Freq = 0;
+    for (n = 0; n < D_CODES;  n++) s->dyn_dtree[n].Freq = 0;
+    for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
+
+    s->dyn_ltree[END_BLOCK].Freq = 1;
+    s->opt_len = s->static_len = 0L;
+    s->last_lit = s->matches = 0;
+}
+
+#define SMALLEST 1
+/* Index within the heap array of least frequent node in the Huffman tree */
+
+
+/* ===========================================================================
+ * Remove the smallest element from the heap and recreate the heap with
+ * one less element. Updates heap and heap_len.
+ */
+#define pqremove(s, tree, top) \
+{\
+    top = s->heap[SMALLEST]; \
+    s->heap[SMALLEST] = s->heap[s->heap_len--]; \
+    pqdownheap(s, tree, SMALLEST); \
+}
+
+/* ===========================================================================
+ * Compares to subtrees, using the tree depth as tie breaker when
+ * the subtrees have equal frequency. This minimizes the worst case length.
+ */
+#define smaller(tree, n, m, depth) \
+   (tree[n].Freq < tree[m].Freq || \
+   (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m]))
+
+/* ===========================================================================
+ * Restore the heap property by moving down the tree starting at node k,
+ * exchanging a node with the smallest of its two sons if necessary, stopping
+ * when the heap property is re-established (each father smaller than its
+ * two sons).
+ */
+local void pqdownheap(s, tree, k)
+    deflate_state *s;
+    ct_data *tree;  /* the tree to restore */
+    int k;               /* node to move down */
+{
+    int v = s->heap[k];
+    int j = k << 1;  /* left son of k */
+    while (j <= s->heap_len) {
+        /* Set j to the smallest of the two sons: */
+        if (j < s->heap_len &&
+            smaller(tree, s->heap[j+1], s->heap[j], s->depth)) {
+            j++;
+        }
+        /* Exit if v is smaller than both sons */
+        if (smaller(tree, v, s->heap[j], s->depth)) break;
+
+        /* Exchange v with the smallest son */
+        s->heap[k] = s->heap[j];  k = j;
+
+        /* And continue down the tree, setting j to the left son of k */
+        j <<= 1;
+    }
+    s->heap[k] = v;
+}
+
+/* ===========================================================================
+ * Compute the optimal bit lengths for a tree and update the total bit length
+ * for the current block.
+ * IN assertion: the fields freq and dad are set, heap[heap_max] and
+ *    above are the tree nodes sorted by increasing frequency.
+ * OUT assertions: the field len is set to the optimal bit length, the
+ *     array bl_count contains the frequencies for each bit length.
+ *     The length opt_len is updated; static_len is also updated if stree is
+ *     not null.
+ */
+local void gen_bitlen(s, desc)
+    deflate_state *s;
+    tree_desc *desc;    /* the tree descriptor */
+{
+    ct_data *tree        = desc->dyn_tree;
+    int max_code         = desc->max_code;
+    const ct_data *stree = desc->stat_desc->static_tree;
+    const intf *extra    = desc->stat_desc->extra_bits;
+    int base             = desc->stat_desc->extra_base;
+    int max_length       = desc->stat_desc->max_length;
+    int h;              /* heap index */
+    int n, m;           /* iterate over the tree elements */
+    int bits;           /* bit length */
+    int xbits;          /* extra bits */
+    ush f;              /* frequency */
+    int overflow = 0;   /* number of elements with bit length too large */
+
+    for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0;
+
+    /* In a first pass, compute the optimal bit lengths (which may
+     * overflow in the case of the bit length tree).
+     */
+    tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */
+
+    for (h = s->heap_max+1; h < HEAP_SIZE; h++) {
+        n = s->heap[h];
+        bits = tree[tree[n].Dad].Len + 1;
+        if (bits > max_length) bits = max_length, overflow++;
+        tree[n].Len = (ush)bits;
+        /* We overwrite tree[n].Dad which is no longer needed */
+
+        if (n > max_code) continue; /* not a leaf node */
+
+        s->bl_count[bits]++;
+        xbits = 0;
+        if (n >= base) xbits = extra[n-base];
+        f = tree[n].Freq;
+        s->opt_len += (ulg)f * (unsigned)(bits + xbits);
+        if (stree) s->static_len += (ulg)f * (unsigned)(stree[n].Len + xbits);
+    }
+    if (overflow == 0) return;
+
+    Tracev((stderr,"\nbit length overflow\n"));
+    /* This happens for example on obj2 and pic of the Calgary corpus */
+
+    /* Find the first bit length which could increase: */
+    do {
+        bits = max_length-1;
+        while (s->bl_count[bits] == 0) bits--;
+        s->bl_count[bits]--;      /* move one leaf down the tree */
+        s->bl_count[bits+1] += 2; /* move one overflow item as its brother */
+        s->bl_count[max_length]--;
+        /* The brother of the overflow item also moves one step up,
+         * but this does not affect bl_count[max_length]
+         */
+        overflow -= 2;
+    } while (overflow > 0);
+
+    /* Now recompute all bit lengths, scanning in increasing frequency.
+     * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
+     * lengths instead of fixing only the wrong ones. This idea is taken
+     * from 'ar' written by Haruhiko Okumura.)
+     */
+    for (bits = max_length; bits != 0; bits--) {
+        n = s->bl_count[bits];
+        while (n != 0) {
+            m = s->heap[--h];
+            if (m > max_code) continue;
+            if ((unsigned) tree[m].Len != (unsigned) bits) {
+                Tracev((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits));
+                s->opt_len += ((ulg)bits - tree[m].Len) * tree[m].Freq;
+                tree[m].Len = (ush)bits;
+            }
+            n--;
+        }
+    }
+}
+
+/* ===========================================================================
+ * Generate the codes for a given tree and bit counts (which need not be
+ * optimal).
+ * IN assertion: the array bl_count contains the bit length statistics for
+ * the given tree and the field len is set for all tree elements.
+ * OUT assertion: the field code is set for all tree elements of non
+ *     zero code length.
+ */
+local void gen_codes (tree, max_code, bl_count)
+    ct_data *tree;             /* the tree to decorate */
+    int max_code;              /* largest code with non zero frequency */
+    ushf *bl_count;            /* number of codes at each bit length */
+{
+    ush next_code[MAX_BITS+1]; /* next code value for each bit length */
+    unsigned code = 0;         /* running code value */
+    int bits;                  /* bit index */
+    int n;                     /* code index */
+
+    /* The distribution counts are first used to generate the code values
+     * without bit reversal.
+     */
+    for (bits = 1; bits <= MAX_BITS; bits++) {
+        code = (code + bl_count[bits-1]) << 1;
+        next_code[bits] = (ush)code;
+    }
+    /* Check that the bit counts in bl_count are consistent. The last code
+     * must be all ones.
+     */
+    Assert (code + bl_count[MAX_BITS]-1 == (1<<MAX_BITS)-1,
+            "inconsistent bit counts");
+    Tracev((stderr,"\ngen_codes: max_code %d ", max_code));
+
+    for (n = 0;  n <= max_code; n++) {
+        int len = tree[n].Len;
+        if (len == 0) continue;
+        /* Now reverse the bits */
+        tree[n].Code = (ush)bi_reverse(next_code[len]++, len);
+
+        Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",
+             n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1));
+    }
+}
+
+/* ===========================================================================
+ * Construct one Huffman tree and assigns the code bit strings and lengths.
+ * Update the total bit length for the current block.
+ * IN assertion: the field freq is set for all tree elements.
+ * OUT assertions: the fields len and code are set to the optimal bit length
+ *     and corresponding code. The length opt_len is updated; static_len is
+ *     also updated if stree is not null. The field max_code is set.
+ */
+local void build_tree(s, desc)
+    deflate_state *s;
+    tree_desc *desc; /* the tree descriptor */
+{
+    ct_data *tree         = desc->dyn_tree;
+    const ct_data *stree  = desc->stat_desc->static_tree;
+    int elems             = desc->stat_desc->elems;
+    int n, m;          /* iterate over heap elements */
+    int max_code = -1; /* largest code with non zero frequency */
+    int node;          /* new node being created */
+
+    /* Construct the initial heap, with least frequent element in
+     * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
+     * heap[0] is not used.
+     */
+    s->heap_len = 0, s->heap_max = HEAP_SIZE;
+
+    for (n = 0; n < elems; n++) {
+        if (tree[n].Freq != 0) {
+            s->heap[++(s->heap_len)] = max_code = n;
+            s->depth[n] = 0;
+        } else {
+            tree[n].Len = 0;
+        }
+    }
+
+    /* The pkzip format requires that at least one distance code exists,
+     * and that at least one bit should be sent even if there is only one
+     * possible code. So to avoid special checks later on we force at least
+     * two codes of non zero frequency.
+     */
+    while (s->heap_len < 2) {
+        node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0);
+        tree[node].Freq = 1;
+        s->depth[node] = 0;
+        s->opt_len--; if (stree) s->static_len -= stree[node].Len;
+        /* node is 0 or 1 so it does not have extra bits */
+    }
+    desc->max_code = max_code;
+
+    /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
+     * establish sub-heaps of increasing lengths:
+     */
+    for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n);
+
+    /* Construct the Huffman tree by repeatedly combining the least two
+     * frequent nodes.
+     */
+    node = elems;              /* next internal node of the tree */
+    do {
+        pqremove(s, tree, n);  /* n = node of least frequency */
+        m = s->heap[SMALLEST]; /* m = node of next least frequency */
+
+        s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */
+        s->heap[--(s->heap_max)] = m;
+
+        /* Create a new node father of n and m */
+        tree[node].Freq = tree[n].Freq + tree[m].Freq;
+        s->depth[node] = (uch)((s->depth[n] >= s->depth[m] ?
+                                s->depth[n] : s->depth[m]) + 1);
+        tree[n].Dad = tree[m].Dad = (ush)node;
+#ifdef DUMP_BL_TREE
+        if (tree == s->bl_tree) {
+            fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)",
+                    node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq);
+        }
+#endif
+        /* and insert the new node in the heap */
+        s->heap[SMALLEST] = node++;
+        pqdownheap(s, tree, SMALLEST);
+
+    } while (s->heap_len >= 2);
+
+    s->heap[--(s->heap_max)] = s->heap[SMALLEST];
+
+    /* At this point, the fields freq and dad are set. We can now
+     * generate the bit lengths.
+     */
+    gen_bitlen(s, (tree_desc *)desc);
+
+    /* The field len is now set, we can generate the bit codes */
+    gen_codes ((ct_data *)tree, max_code, s->bl_count);
+}
+
+/* ===========================================================================
+ * Scan a literal or distance tree to determine the frequencies of the codes
+ * in the bit length tree.
+ */
+local void scan_tree (s, tree, max_code)
+    deflate_state *s;
+    ct_data *tree;   /* the tree to be scanned */
+    int max_code;    /* and its largest code of non zero frequency */
+{
+    int n;                     /* iterates over all tree elements */
+    int prevlen = -1;          /* last emitted length */
+    int curlen;                /* length of current code */
+    int nextlen = tree[0].Len; /* length of next code */
+    int count = 0;             /* repeat count of the current code */
+    int max_count = 7;         /* max repeat count */
+    int min_count = 4;         /* min repeat count */
+
+    if (nextlen == 0) max_count = 138, min_count = 3;
+    tree[max_code+1].Len = (ush)0xffff; /* guard */
+
+    for (n = 0; n <= max_code; n++) {
+        curlen = nextlen; nextlen = tree[n+1].Len;
+        if (++count < max_count && curlen == nextlen) {
+            continue;
+        } else if (count < min_count) {
+            s->bl_tree[curlen].Freq += count;
+        } else if (curlen != 0) {
+            if (curlen != prevlen) s->bl_tree[curlen].Freq++;
+            s->bl_tree[REP_3_6].Freq++;
+        } else if (count <= 10) {
+            s->bl_tree[REPZ_3_10].Freq++;
+        } else {
+            s->bl_tree[REPZ_11_138].Freq++;
+        }
+        count = 0; prevlen = curlen;
+        if (nextlen == 0) {
+            max_count = 138, min_count = 3;
+        } else if (curlen == nextlen) {
+            max_count = 6, min_count = 3;
+        } else {
+            max_count = 7, min_count = 4;
+        }
+    }
+}
+
+/* ===========================================================================
+ * Send a literal or distance tree in compressed form, using the codes in
+ * bl_tree.
+ */
+local void send_tree (s, tree, max_code)
+    deflate_state *s;
+    ct_data *tree; /* the tree to be scanned */
+    int max_code;       /* and its largest code of non zero frequency */
+{
+    int n;                     /* iterates over all tree elements */
+    int prevlen = -1;          /* last emitted length */
+    int curlen;                /* length of current code */
+    int nextlen = tree[0].Len; /* length of next code */
+    int count = 0;             /* repeat count of the current code */
+    int max_count = 7;         /* max repeat count */
+    int min_count = 4;         /* min repeat count */
+
+    /* tree[max_code+1].Len = -1; */  /* guard already set */
+    if (nextlen == 0) max_count = 138, min_count = 3;
+
+    for (n = 0; n <= max_code; n++) {
+        curlen = nextlen; nextlen = tree[n+1].Len;
+        if (++count < max_count && curlen == nextlen) {
+            continue;
+        } else if (count < min_count) {
+            do { send_code(s, curlen, s->bl_tree); } while (--count != 0);
+
+        } else if (curlen != 0) {
+            if (curlen != prevlen) {
+                send_code(s, curlen, s->bl_tree); count--;
+            }
+            Assert(count >= 3 && count <= 6, " 3_6?");
+            send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2);
+
+        } else if (count <= 10) {
+            send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3);
+
+        } else {
+            send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7);
+        }
+        count = 0; prevlen = curlen;
+        if (nextlen == 0) {
+            max_count = 138, min_count = 3;
+        } else if (curlen == nextlen) {
+            max_count = 6, min_count = 3;
+        } else {
+            max_count = 7, min_count = 4;
+        }
+    }
+}
+
+/* ===========================================================================
+ * Construct the Huffman tree for the bit lengths and return the index in
+ * bl_order of the last bit length code to send.
+ */
+local int build_bl_tree(s)
+    deflate_state *s;
+{
+    int max_blindex;  /* index of last bit length code of non zero freq */
+
+    /* Determine the bit length frequencies for literal and distance trees */
+    scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code);
+    scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code);
+
+    /* Build the bit length tree: */
+    build_tree(s, (tree_desc *)(&(s->bl_desc)));
+    /* opt_len now includes the length of the tree representations, except
+     * the lengths of the bit lengths codes and the 5+5+4 bits for the counts.
+     */
+
+    /* Determine the number of bit length codes to send. The pkzip format
+     * requires that at least 4 bit length codes be sent. (appnote.txt says
+     * 3 but the actual value used is 4.)
+     */
+    for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) {
+        if (s->bl_tree[bl_order[max_blindex]].Len != 0) break;
+    }
+    /* Update opt_len to include the bit length tree and counts */
+    s->opt_len += 3*((ulg)max_blindex+1) + 5+5+4;
+    Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld",
+            s->opt_len, s->static_len));
+
+    return max_blindex;
+}
+
+/* ===========================================================================
+ * Send the header for a block using dynamic Huffman trees: the counts, the
+ * lengths of the bit length codes, the literal tree and the distance tree.
+ * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
+ */
+local void send_all_trees(s, lcodes, dcodes, blcodes)
+    deflate_state *s;
+    int lcodes, dcodes, blcodes; /* number of codes for each tree */
+{
+    int rank;                    /* index in bl_order */
+
+    Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes");
+    Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES,
+            "too many codes");
+    Tracev((stderr, "\nbl counts: "));
+    send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */
+    send_bits(s, dcodes-1,   5);
+    send_bits(s, blcodes-4,  4); /* not -3 as stated in appnote.txt */
+    for (rank = 0; rank < blcodes; rank++) {
+        Tracev((stderr, "\nbl code %2d ", bl_order[rank]));
+        send_bits(s, s->bl_tree[bl_order[rank]].Len, 3);
+    }
+    Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent));
+
+    send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */
+    Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent));
+
+    send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */
+    Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent));
+}
+
+/* ===========================================================================
+ * Send a stored block
+ */
+void ZLIB_INTERNAL _tr_stored_block(s, buf, stored_len, last)
+    deflate_state *s;
+    charf *buf;       /* input block */
+    ulg stored_len;   /* length of input block */
+    int last;         /* one if this is the last block for a file */
+{
+    send_bits(s, (STORED_BLOCK<<1)+last, 3);    /* send block type */
+    bi_windup(s);        /* align on byte boundary */
+    put_short(s, (ush)stored_len);
+    put_short(s, (ush)~stored_len);
+    zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len);
+    s->pending += stored_len;
+#ifdef ZLIB_DEBUG
+    s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;
+    s->compressed_len += (stored_len + 4) << 3;
+    s->bits_sent += 2*16;
+    s->bits_sent += stored_len<<3;
+#endif
+}
+
+/* ===========================================================================
+ * Flush the bits in the bit buffer to pending output (leaves at most 7 bits)
+ */
+void ZLIB_INTERNAL _tr_flush_bits(s)
+    deflate_state *s;
+{
+    bi_flush(s);
+}
+
+/* ===========================================================================
+ * Send one empty static block to give enough lookahead for inflate.
+ * This takes 10 bits, of which 7 may remain in the bit buffer.
+ */
+void ZLIB_INTERNAL _tr_align(s)
+    deflate_state *s;
+{
+    send_bits(s, STATIC_TREES<<1, 3);
+    send_code(s, END_BLOCK, static_ltree);
+#ifdef ZLIB_DEBUG
+    s->compressed_len += 10L; /* 3 for block type, 7 for EOB */
+#endif
+    bi_flush(s);
+}
+
+/* ===========================================================================
+ * Determine the best encoding for the current block: dynamic trees, static
+ * trees or store, and write out the encoded block.
+ */
+void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last)
+    deflate_state *s;
+    charf *buf;       /* input block, or NULL if too old */
+    ulg stored_len;   /* length of input block */
+    int last;         /* one if this is the last block for a file */
+{
+    ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
+    int max_blindex = 0;  /* index of last bit length code of non zero freq */
+
+    /* Build the Huffman trees unless a stored block is forced */
+    if (s->level > 0) {
+
+        /* Check if the file is binary or text */
+        if (s->strm->data_type == Z_UNKNOWN)
+            s->strm->data_type = detect_data_type(s);
+
+        /* Construct the literal and distance trees */
+        build_tree(s, (tree_desc *)(&(s->l_desc)));
+        Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len,
+                s->static_len));
+
+        build_tree(s, (tree_desc *)(&(s->d_desc)));
+        Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len,
+                s->static_len));
+        /* At this point, opt_len and static_len are the total bit lengths of
+         * the compressed block data, excluding the tree representations.
+         */
+
+        /* Build the bit length tree for the above two trees, and get the index
+         * in bl_order of the last bit length code to send.
+         */
+        max_blindex = build_bl_tree(s);
+
+        /* Determine the best encoding. Compute the block lengths in bytes. */
+        opt_lenb = (s->opt_len+3+7)>>3;
+        static_lenb = (s->static_len+3+7)>>3;
+
+        Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",
+                opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,
+                s->last_lit));
+
+        if (static_lenb <= opt_lenb) opt_lenb = static_lenb;
+
+    } else {
+        Assert(buf != (char*)0, "lost buf");
+        opt_lenb = static_lenb = stored_len + 5; /* force a stored block */
+    }
+
+#ifdef FORCE_STORED
+    if (buf != (char*)0) { /* force stored block */
+#else
+    if (stored_len+4 <= opt_lenb && buf != (char*)0) {
+                       /* 4: two words for the lengths */
+#endif
+        /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
+         * Otherwise we can't have processed more than WSIZE input bytes since
+         * the last block flush, because compression would have been
+         * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
+         * transform a block into a stored block.
+         */
+        _tr_stored_block(s, buf, stored_len, last);
+
+#ifdef FORCE_STATIC
+    } else if (static_lenb >= 0) { /* force static trees */
+#else
+    } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) {
+#endif
+        send_bits(s, (STATIC_TREES<<1)+last, 3);
+        compress_block(s, (const ct_data *)static_ltree,
+                       (const ct_data *)static_dtree);
+#ifdef ZLIB_DEBUG
+        s->compressed_len += 3 + s->static_len;
+#endif
+    } else {
+        send_bits(s, (DYN_TREES<<1)+last, 3);
+        send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1,
+                       max_blindex+1);
+        compress_block(s, (const ct_data *)s->dyn_ltree,
+                       (const ct_data *)s->dyn_dtree);
+#ifdef ZLIB_DEBUG
+        s->compressed_len += 3 + s->opt_len;
+#endif
+    }
+    Assert (s->compressed_len == s->bits_sent, "bad compressed size");
+    /* The above check is made mod 2^32, for files larger than 512 MB
+     * and uLong implemented on 32 bits.
+     */
+    init_block(s);
+
+    if (last) {
+        bi_windup(s);
+#ifdef ZLIB_DEBUG
+        s->compressed_len += 7;  /* align on byte boundary */
+#endif
+    }
+    Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3,
+           s->compressed_len-7*last));
+}
+
+/* ===========================================================================
+ * Save the match info and tally the frequency counts. Return true if
+ * the current block must be flushed.
+ */
+int ZLIB_INTERNAL _tr_tally (s, dist, lc)
+    deflate_state *s;
+    unsigned dist;  /* distance of matched string */
+    unsigned lc;    /* match length-MIN_MATCH or unmatched char (if dist==0) */
+{
+    s->d_buf[s->last_lit] = (ush)dist;
+    s->l_buf[s->last_lit++] = (uch)lc;
+    if (dist == 0) {
+        /* lc is the unmatched char */
+        s->dyn_ltree[lc].Freq++;
+    } else {
+        s->matches++;
+        /* Here, lc is the match length - MIN_MATCH */
+        dist--;             /* dist = match distance - 1 */
+        Assert((ush)dist < (ush)MAX_DIST(s) &&
+               (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) &&
+               (ush)d_code(dist) < (ush)D_CODES,  "_tr_tally: bad match");
+
+        s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++;
+        s->dyn_dtree[d_code(dist)].Freq++;
+    }
+
+#ifdef TRUNCATE_BLOCK
+    /* Try to guess if it is profitable to stop the current block here */
+    if ((s->last_lit & 0x1fff) == 0 && s->level > 2) {
+        /* Compute an upper bound for the compressed length */
+        ulg out_length = (ulg)s->last_lit*8L;
+        ulg in_length = (ulg)((long)s->strstart - s->block_start);
+        int dcode;
+        for (dcode = 0; dcode < D_CODES; dcode++) {
+            out_length += (ulg)s->dyn_dtree[dcode].Freq *
+                (5L+extra_dbits[dcode]);
+        }
+        out_length >>= 3;
+        Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ",
+               s->last_lit, in_length, out_length,
+               100L - out_length*100L/in_length));
+        if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1;
+    }
+#endif
+    return (s->last_lit == s->lit_bufsize-1);
+    /* We avoid equality with lit_bufsize because of wraparound at 64K
+     * on 16 bit machines and because stored blocks are restricted to
+     * 64K-1 bytes.
+     */
+}
+
+/* ===========================================================================
+ * Send the block data compressed using the given Huffman trees
+ */
+local void compress_block(s, ltree, dtree)
+    deflate_state *s;
+    const ct_data *ltree; /* literal tree */
+    const ct_data *dtree; /* distance tree */
+{
+    unsigned dist;      /* distance of matched string */
+    int lc;             /* match length or unmatched char (if dist == 0) */
+    unsigned lx = 0;    /* running index in l_buf */
+    unsigned code;      /* the code to send */
+    int extra;          /* number of extra bits to send */
+
+    if (s->last_lit != 0) do {
+        dist = s->d_buf[lx];
+        lc = s->l_buf[lx++];
+        if (dist == 0) {
+            send_code(s, lc, ltree); /* send a literal byte */
+            Tracecv(isgraph(lc), (stderr," '%c' ", lc));
+        } else {
+            /* Here, lc is the match length - MIN_MATCH */
+            code = _length_code[lc];
+            send_code(s, code+LITERALS+1, ltree); /* send the length code */
+            extra = extra_lbits[code];
+            if (extra != 0) {
+                lc -= base_length[code];
+                send_bits(s, lc, extra);       /* send the extra length bits */
+            }
+            dist--; /* dist is now the match distance - 1 */
+            code = d_code(dist);
+            Assert (code < D_CODES, "bad d_code");
+
+            send_code(s, code, dtree);       /* send the distance code */
+            extra = extra_dbits[code];
+            if (extra != 0) {
+                dist -= (unsigned)base_dist[code];
+                send_bits(s, dist, extra);   /* send the extra distance bits */
+            }
+        } /* literal or match pair ? */
+
+        /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
+        Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx,
+               "pendingBuf overflow");
+
+    } while (lx < s->last_lit);
+
+    send_code(s, END_BLOCK, ltree);
+}
+
+/* ===========================================================================
+ * Check if the data type is TEXT or BINARY, using the following algorithm:
+ * - TEXT if the two conditions below are satisfied:
+ *    a) There are no non-portable control characters belonging to the
+ *       "black list" (0..6, 14..25, 28..31).
+ *    b) There is at least one printable character belonging to the
+ *       "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).
+ * - BINARY otherwise.
+ * - The following partially-portable control characters form a
+ *   "gray list" that is ignored in this detection algorithm:
+ *   (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}).
+ * IN assertion: the fields Freq of dyn_ltree are set.
+ */
+local int detect_data_type(s)
+    deflate_state *s;
+{
+    /* black_mask is the bit mask of black-listed bytes
+     * set bits 0..6, 14..25, and 28..31
+     * 0xf3ffc07f = binary 11110011111111111100000001111111
+     */
+    unsigned long black_mask = 0xf3ffc07fUL;
+    int n;
+
+    /* Check for non-textual ("black-listed") bytes. */
+    for (n = 0; n <= 31; n++, black_mask >>= 1)
+        if ((black_mask & 1) && (s->dyn_ltree[n].Freq != 0))
+            return Z_BINARY;
+
+    /* Check for textual ("white-listed") bytes. */
+    if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0
+            || s->dyn_ltree[13].Freq != 0)
+        return Z_TEXT;
+    for (n = 32; n < LITERALS; n++)
+        if (s->dyn_ltree[n].Freq != 0)
+            return Z_TEXT;
+
+    /* There are no "black-listed" or "white-listed" bytes:
+     * this stream either is empty or has tolerated ("gray-listed") bytes only.
+     */
+    return Z_BINARY;
+}
+
+/* ===========================================================================
+ * Reverse the first len bits of a code, using straightforward code (a faster
+ * method would use a table)
+ * IN assertion: 1 <= len <= 15
+ */
+local unsigned bi_reverse(code, len)
+    unsigned code; /* the value to invert */
+    int len;       /* its bit length */
+{
+    register unsigned res = 0;
+    do {
+        res |= code & 1;
+        code >>= 1, res <<= 1;
+    } while (--len > 0);
+    return res >> 1;
+}
+
+/* ===========================================================================
+ * Flush the bit buffer, keeping at most 7 bits in it.
+ */
+local void bi_flush(s)
+    deflate_state *s;
+{
+    if (s->bi_valid == 16) {
+        put_short(s, s->bi_buf);
+        s->bi_buf = 0;
+        s->bi_valid = 0;
+    } else if (s->bi_valid >= 8) {
+        put_byte(s, (Byte)s->bi_buf);
+        s->bi_buf >>= 8;
+        s->bi_valid -= 8;
+    }
+}
+
+/* ===========================================================================
+ * Flush the bit buffer and align the output on a byte boundary
+ */
+local void bi_windup(s)
+    deflate_state *s;
+{
+    if (s->bi_valid > 8) {
+        put_short(s, s->bi_buf);
+    } else if (s->bi_valid > 0) {
+        put_byte(s, (Byte)s->bi_buf);
+    }
+    s->bi_buf = 0;
+    s->bi_valid = 0;
+#ifdef ZLIB_DEBUG
+    s->bits_sent = (s->bits_sent+7) & ~7;
+#endif
+}
diff --git a/libraries/zlib/trees.h b/libraries/zlib/trees.h
new file mode 100644
index 000000000..d35639d82
--- /dev/null
+++ b/libraries/zlib/trees.h
@@ -0,0 +1,128 @@
+/* header created automatically with -DGEN_TREES_H */
+
+local const ct_data static_ltree[L_CODES+2] = {
+{{ 12},{  8}}, {{140},{  8}}, {{ 76},{  8}}, {{204},{  8}}, {{ 44},{  8}},
+{{172},{  8}}, {{108},{  8}}, {{236},{  8}}, {{ 28},{  8}}, {{156},{  8}},
+{{ 92},{  8}}, {{220},{  8}}, {{ 60},{  8}}, {{188},{  8}}, {{124},{  8}},
+{{252},{  8}}, {{  2},{  8}}, {{130},{  8}}, {{ 66},{  8}}, {{194},{  8}},
+{{ 34},{  8}}, {{162},{  8}}, {{ 98},{  8}}, {{226},{  8}}, {{ 18},{  8}},
+{{146},{  8}}, {{ 82},{  8}}, {{210},{  8}}, {{ 50},{  8}}, {{178},{  8}},
+{{114},{  8}}, {{242},{  8}}, {{ 10},{  8}}, {{138},{  8}}, {{ 74},{  8}},
+{{202},{  8}}, {{ 42},{  8}}, {{170},{  8}}, {{106},{  8}}, {{234},{  8}},
+{{ 26},{  8}}, {{154},{  8}}, {{ 90},{  8}}, {{218},{  8}}, {{ 58},{  8}},
+{{186},{  8}}, {{122},{  8}}, {{250},{  8}}, {{  6},{  8}}, {{134},{  8}},
+{{ 70},{  8}}, {{198},{  8}}, {{ 38},{  8}}, {{166},{  8}}, {{102},{  8}},
+{{230},{  8}}, {{ 22},{  8}}, {{150},{  8}}, {{ 86},{  8}}, {{214},{  8}},
+{{ 54},{  8}}, {{182},{  8}}, {{118},{  8}}, {{246},{  8}}, {{ 14},{  8}},
+{{142},{  8}}, {{ 78},{  8}}, {{206},{  8}}, {{ 46},{  8}}, {{174},{  8}},
+{{110},{  8}}, {{238},{  8}}, {{ 30},{  8}}, {{158},{  8}}, {{ 94},{  8}},
+{{222},{  8}}, {{ 62},{  8}}, {{190},{  8}}, {{126},{  8}}, {{254},{  8}},
+{{  1},{  8}}, {{129},{  8}}, {{ 65},{  8}}, {{193},{  8}}, {{ 33},{  8}},
+{{161},{  8}}, {{ 97},{  8}}, {{225},{  8}}, {{ 17},{  8}}, {{145},{  8}},
+{{ 81},{  8}}, {{209},{  8}}, {{ 49},{  8}}, {{177},{  8}}, {{113},{  8}},
+{{241},{  8}}, {{  9},{  8}}, {{137},{  8}}, {{ 73},{  8}}, {{201},{  8}},
+{{ 41},{  8}}, {{169},{  8}}, {{105},{  8}}, {{233},{  8}}, {{ 25},{  8}},
+{{153},{  8}}, {{ 89},{  8}}, {{217},{  8}}, {{ 57},{  8}}, {{185},{  8}},
+{{121},{  8}}, {{249},{  8}}, {{  5},{  8}}, {{133},{  8}}, {{ 69},{  8}},
+{{197},{  8}}, {{ 37},{  8}}, {{165},{  8}}, {{101},{  8}}, {{229},{  8}},
+{{ 21},{  8}}, {{149},{  8}}, {{ 85},{  8}}, {{213},{  8}}, {{ 53},{  8}},
+{{181},{  8}}, {{117},{  8}}, {{245},{  8}}, {{ 13},{  8}}, {{141},{  8}},
+{{ 77},{  8}}, {{205},{  8}}, {{ 45},{  8}}, {{173},{  8}}, {{109},{  8}},
+{{237},{  8}}, {{ 29},{  8}}, {{157},{  8}}, {{ 93},{  8}}, {{221},{  8}},
+{{ 61},{  8}}, {{189},{  8}}, {{125},{  8}}, {{253},{  8}}, {{ 19},{  9}},
+{{275},{  9}}, {{147},{  9}}, {{403},{  9}}, {{ 83},{  9}}, {{339},{  9}},
+{{211},{  9}}, {{467},{  9}}, {{ 51},{  9}}, {{307},{  9}}, {{179},{  9}},
+{{435},{  9}}, {{115},{  9}}, {{371},{  9}}, {{243},{  9}}, {{499},{  9}},
+{{ 11},{  9}}, {{267},{  9}}, {{139},{  9}}, {{395},{  9}}, {{ 75},{  9}},
+{{331},{  9}}, {{203},{  9}}, {{459},{  9}}, {{ 43},{  9}}, {{299},{  9}},
+{{171},{  9}}, {{427},{  9}}, {{107},{  9}}, {{363},{  9}}, {{235},{  9}},
+{{491},{  9}}, {{ 27},{  9}}, {{283},{  9}}, {{155},{  9}}, {{411},{  9}},
+{{ 91},{  9}}, {{347},{  9}}, {{219},{  9}}, {{475},{  9}}, {{ 59},{  9}},
+{{315},{  9}}, {{187},{  9}}, {{443},{  9}}, {{123},{  9}}, {{379},{  9}},
+{{251},{  9}}, {{507},{  9}}, {{  7},{  9}}, {{263},{  9}}, {{135},{  9}},
+{{391},{  9}}, {{ 71},{  9}}, {{327},{  9}}, {{199},{  9}}, {{455},{  9}},
+{{ 39},{  9}}, {{295},{  9}}, {{167},{  9}}, {{423},{  9}}, {{103},{  9}},
+{{359},{  9}}, {{231},{  9}}, {{487},{  9}}, {{ 23},{  9}}, {{279},{  9}},
+{{151},{  9}}, {{407},{  9}}, {{ 87},{  9}}, {{343},{  9}}, {{215},{  9}},
+{{471},{  9}}, {{ 55},{  9}}, {{311},{  9}}, {{183},{  9}}, {{439},{  9}},
+{{119},{  9}}, {{375},{  9}}, {{247},{  9}}, {{503},{  9}}, {{ 15},{  9}},
+{{271},{  9}}, {{143},{  9}}, {{399},{  9}}, {{ 79},{  9}}, {{335},{  9}},
+{{207},{  9}}, {{463},{  9}}, {{ 47},{  9}}, {{303},{  9}}, {{175},{  9}},
+{{431},{  9}}, {{111},{  9}}, {{367},{  9}}, {{239},{  9}}, {{495},{  9}},
+{{ 31},{  9}}, {{287},{  9}}, {{159},{  9}}, {{415},{  9}}, {{ 95},{  9}},
+{{351},{  9}}, {{223},{  9}}, {{479},{  9}}, {{ 63},{  9}}, {{319},{  9}},
+{{191},{  9}}, {{447},{  9}}, {{127},{  9}}, {{383},{  9}}, {{255},{  9}},
+{{511},{  9}}, {{  0},{  7}}, {{ 64},{  7}}, {{ 32},{  7}}, {{ 96},{  7}},
+{{ 16},{  7}}, {{ 80},{  7}}, {{ 48},{  7}}, {{112},{  7}}, {{  8},{  7}},
+{{ 72},{  7}}, {{ 40},{  7}}, {{104},{  7}}, {{ 24},{  7}}, {{ 88},{  7}},
+{{ 56},{  7}}, {{120},{  7}}, {{  4},{  7}}, {{ 68},{  7}}, {{ 36},{  7}},
+{{100},{  7}}, {{ 20},{  7}}, {{ 84},{  7}}, {{ 52},{  7}}, {{116},{  7}},
+{{  3},{  8}}, {{131},{  8}}, {{ 67},{  8}}, {{195},{  8}}, {{ 35},{  8}},
+{{163},{  8}}, {{ 99},{  8}}, {{227},{  8}}
+};
+
+local const ct_data static_dtree[D_CODES] = {
+{{ 0},{ 5}}, {{16},{ 5}}, {{ 8},{ 5}}, {{24},{ 5}}, {{ 4},{ 5}},
+{{20},{ 5}}, {{12},{ 5}}, {{28},{ 5}}, {{ 2},{ 5}}, {{18},{ 5}},
+{{10},{ 5}}, {{26},{ 5}}, {{ 6},{ 5}}, {{22},{ 5}}, {{14},{ 5}},
+{{30},{ 5}}, {{ 1},{ 5}}, {{17},{ 5}}, {{ 9},{ 5}}, {{25},{ 5}},
+{{ 5},{ 5}}, {{21},{ 5}}, {{13},{ 5}}, {{29},{ 5}}, {{ 3},{ 5}},
+{{19},{ 5}}, {{11},{ 5}}, {{27},{ 5}}, {{ 7},{ 5}}, {{23},{ 5}}
+};
+
+const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {
+ 0,  1,  2,  3,  4,  4,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,  8,  8,  8,  8,
+ 8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10,
+10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13,
+13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15,
+15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,  0,  0, 16, 17,
+18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22,
+23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
+27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29
+};
+
+const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {
+ 0,  1,  2,  3,  4,  5,  6,  7,  8,  8,  9,  9, 10, 10, 11, 11, 12, 12, 12, 12,
+13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
+17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
+19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
+22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
+23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
+26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28
+};
+
+local const int base_length[LENGTH_CODES] = {
+0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
+64, 80, 96, 112, 128, 160, 192, 224, 0
+};
+
+local const int base_dist[D_CODES] = {
+    0,     1,     2,     3,     4,     6,     8,    12,    16,    24,
+   32,    48,    64,    96,   128,   192,   256,   384,   512,   768,
+ 1024,  1536,  2048,  3072,  4096,  6144,  8192, 12288, 16384, 24576
+};
+
diff --git a/libraries/zlib/uncompr.c b/libraries/zlib/uncompr.c
new file mode 100644
index 000000000..f03a1a865
--- /dev/null
+++ b/libraries/zlib/uncompr.c
@@ -0,0 +1,93 @@
+/* uncompr.c -- decompress a memory buffer
+ * Copyright (C) 1995-2003, 2010, 2014, 2016 Jean-loup Gailly, Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* @(#) $Id$ */
+
+#define ZLIB_INTERNAL
+#include "zlib.h"
+
+/* ===========================================================================
+     Decompresses the source buffer into the destination buffer.  *sourceLen is
+   the byte length of the source buffer. Upon entry, *destLen is the total size
+   of the destination buffer, which must be large enough to hold the entire
+   uncompressed data. (The size of the uncompressed data must have been saved
+   previously by the compressor and transmitted to the decompressor by some
+   mechanism outside the scope of this compression library.) Upon exit,
+   *destLen is the size of the decompressed data and *sourceLen is the number
+   of source bytes consumed. Upon return, source + *sourceLen points to the
+   first unused input byte.
+
+     uncompress returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_BUF_ERROR if there was not enough room in the output buffer, or
+   Z_DATA_ERROR if the input data was corrupted, including if the input data is
+   an incomplete zlib stream.
+*/
+int ZEXPORT uncompress2 (dest, destLen, source, sourceLen)
+    Bytef *dest;
+    uLongf *destLen;
+    const Bytef *source;
+    uLong *sourceLen;
+{
+    z_stream stream;
+    int err;
+    const uInt max = (uInt)-1;
+    uLong len, left;
+    Byte buf[1];    /* for detection of incomplete stream when *destLen == 0 */
+
+    len = *sourceLen;
+    if (*destLen) {
+        left = *destLen;
+        *destLen = 0;
+    }
+    else {
+        left = 1;
+        dest = buf;
+    }
+
+    stream.next_in = (z_const Bytef *)source;
+    stream.avail_in = 0;
+    stream.zalloc = (alloc_func)0;
+    stream.zfree = (free_func)0;
+    stream.opaque = (voidpf)0;
+
+    err = inflateInit(&stream);
+    if (err != Z_OK) return err;
+
+    stream.next_out = dest;
+    stream.avail_out = 0;
+
+    do {
+        if (stream.avail_out == 0) {
+            stream.avail_out = left > (uLong)max ? max : (uInt)left;
+            left -= stream.avail_out;
+        }
+        if (stream.avail_in == 0) {
+            stream.avail_in = len > (uLong)max ? max : (uInt)len;
+            len -= stream.avail_in;
+        }
+        err = inflate(&stream, Z_NO_FLUSH);
+    } while (err == Z_OK);
+
+    *sourceLen -= len + stream.avail_in;
+    if (dest != buf)
+        *destLen = stream.total_out;
+    else if (stream.total_out && err == Z_BUF_ERROR)
+        left = 1;
+
+    inflateEnd(&stream);
+    return err == Z_STREAM_END ? Z_OK :
+           err == Z_NEED_DICT ? Z_DATA_ERROR  :
+           err == Z_BUF_ERROR && left + stream.avail_out ? Z_DATA_ERROR :
+           err;
+}
+
+int ZEXPORT uncompress (dest, destLen, source, sourceLen)
+    Bytef *dest;
+    uLongf *destLen;
+    const Bytef *source;
+    uLong sourceLen;
+{
+    return uncompress2(dest, destLen, source, &sourceLen);
+}
diff --git a/libraries/zlib/win32/zlib.def b/libraries/zlib/win32/zlib.def
new file mode 100644
index 000000000..a2188b000
--- /dev/null
+++ b/libraries/zlib/win32/zlib.def
@@ -0,0 +1,94 @@
+; zlib data compression library
+EXPORTS
+; basic functions
+    zlibVersion
+    deflate
+    deflateEnd
+    inflate
+    inflateEnd
+; advanced functions
+    deflateSetDictionary
+    deflateGetDictionary
+    deflateCopy
+    deflateReset
+    deflateParams
+    deflateTune
+    deflateBound
+    deflatePending
+    deflatePrime
+    deflateSetHeader
+    inflateSetDictionary
+    inflateGetDictionary
+    inflateSync
+    inflateCopy
+    inflateReset
+    inflateReset2
+    inflatePrime
+    inflateMark
+    inflateGetHeader
+    inflateBack
+    inflateBackEnd
+    zlibCompileFlags
+; utility functions
+    compress
+    compress2
+    compressBound
+    uncompress
+    uncompress2
+    gzopen
+    gzdopen
+    gzbuffer
+    gzsetparams
+    gzread
+    gzfread
+    gzwrite
+    gzfwrite
+    gzprintf
+    gzvprintf
+    gzputs
+    gzgets
+    gzputc
+    gzgetc
+    gzungetc
+    gzflush
+    gzseek
+    gzrewind
+    gztell
+    gzoffset
+    gzeof
+    gzdirect
+    gzclose
+    gzclose_r
+    gzclose_w
+    gzerror
+    gzclearerr
+; large file functions
+    gzopen64
+    gzseek64
+    gztell64
+    gzoffset64
+    adler32_combine64
+    crc32_combine64
+; checksum functions
+    adler32
+    adler32_z
+    crc32
+    crc32_z
+    adler32_combine
+    crc32_combine
+; various hacks, don't look :)
+    deflateInit_
+    deflateInit2_
+    inflateInit_
+    inflateInit2_
+    inflateBackInit_
+    gzgetc_
+    zError
+    inflateSyncPoint
+    get_crc_table
+    inflateUndermine
+    inflateValidate
+    inflateCodesUsed
+    inflateResetKeep
+    deflateResetKeep
+    gzopen_w
diff --git a/libraries/zlib/win32/zlib1.rc b/libraries/zlib/win32/zlib1.rc
new file mode 100644
index 000000000..234e641c3
--- /dev/null
+++ b/libraries/zlib/win32/zlib1.rc
@@ -0,0 +1,40 @@
+#include <winver.h>
+#include "../zlib.h"
+
+#ifdef GCC_WINDRES
+VS_VERSION_INFO		VERSIONINFO
+#else
+VS_VERSION_INFO		VERSIONINFO	MOVEABLE IMPURE LOADONCALL DISCARDABLE
+#endif
+  FILEVERSION		ZLIB_VER_MAJOR,ZLIB_VER_MINOR,ZLIB_VER_REVISION,0
+  PRODUCTVERSION	ZLIB_VER_MAJOR,ZLIB_VER_MINOR,ZLIB_VER_REVISION,0
+  FILEFLAGSMASK		VS_FFI_FILEFLAGSMASK
+#ifdef _DEBUG
+  FILEFLAGS		1
+#else
+  FILEFLAGS		0
+#endif
+  FILEOS		VOS__WINDOWS32
+  FILETYPE		VFT_DLL
+  FILESUBTYPE		0	// not used
+BEGIN
+  BLOCK "StringFileInfo"
+  BEGIN
+    BLOCK "040904E4"
+    //language ID = U.S. English, char set = Windows, Multilingual
+    BEGIN
+      VALUE "FileDescription",	"zlib data compression library\0"
+      VALUE "FileVersion",	ZLIB_VERSION "\0"
+      VALUE "InternalName",	"zlib1.dll\0"
+      VALUE "LegalCopyright",	"(C) 1995-2017 Jean-loup Gailly & Mark Adler\0"
+      VALUE "OriginalFilename",	"zlib1.dll\0"
+      VALUE "ProductName",	"zlib\0"
+      VALUE "ProductVersion",	ZLIB_VERSION "\0"
+      VALUE "Comments",		"For more information visit http://www.zlib.net/\0"
+    END
+  END
+  BLOCK "VarFileInfo"
+  BEGIN
+    VALUE "Translation", 0x0409, 1252
+  END
+END
diff --git a/libraries/zlib/zconf.h b/libraries/zlib/zconf.h
new file mode 100644
index 000000000..5e1d68a00
--- /dev/null
+++ b/libraries/zlib/zconf.h
@@ -0,0 +1,534 @@
+/* zconf.h -- configuration of the zlib compression library
+ * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* @(#) $Id$ */
+
+#ifndef ZCONF_H
+#define ZCONF_H
+
+/*
+ * If you *really* need a unique prefix for all types and library functions,
+ * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it.
+ * Even better than compiling with -DZ_PREFIX would be to use configure to set
+ * this permanently in zconf.h using "./configure --zprefix".
+ */
+#ifdef Z_PREFIX     /* may be set to #if 1 by ./configure */
+#  define Z_PREFIX_SET
+
+/* all linked symbols and init macros */
+#  define _dist_code            z__dist_code
+#  define _length_code          z__length_code
+#  define _tr_align             z__tr_align
+#  define _tr_flush_bits        z__tr_flush_bits
+#  define _tr_flush_block       z__tr_flush_block
+#  define _tr_init              z__tr_init
+#  define _tr_stored_block      z__tr_stored_block
+#  define _tr_tally             z__tr_tally
+#  define adler32               z_adler32
+#  define adler32_combine       z_adler32_combine
+#  define adler32_combine64     z_adler32_combine64
+#  define adler32_z             z_adler32_z
+#  ifndef Z_SOLO
+#    define compress              z_compress
+#    define compress2             z_compress2
+#    define compressBound         z_compressBound
+#  endif
+#  define crc32                 z_crc32
+#  define crc32_combine         z_crc32_combine
+#  define crc32_combine64       z_crc32_combine64
+#  define crc32_z               z_crc32_z
+#  define deflate               z_deflate
+#  define deflateBound          z_deflateBound
+#  define deflateCopy           z_deflateCopy
+#  define deflateEnd            z_deflateEnd
+#  define deflateGetDictionary  z_deflateGetDictionary
+#  define deflateInit           z_deflateInit
+#  define deflateInit2          z_deflateInit2
+#  define deflateInit2_         z_deflateInit2_
+#  define deflateInit_          z_deflateInit_
+#  define deflateParams         z_deflateParams
+#  define deflatePending        z_deflatePending
+#  define deflatePrime          z_deflatePrime
+#  define deflateReset          z_deflateReset
+#  define deflateResetKeep      z_deflateResetKeep
+#  define deflateSetDictionary  z_deflateSetDictionary
+#  define deflateSetHeader      z_deflateSetHeader
+#  define deflateTune           z_deflateTune
+#  define deflate_copyright     z_deflate_copyright
+#  define get_crc_table         z_get_crc_table
+#  ifndef Z_SOLO
+#    define gz_error              z_gz_error
+#    define gz_intmax             z_gz_intmax
+#    define gz_strwinerror        z_gz_strwinerror
+#    define gzbuffer              z_gzbuffer
+#    define gzclearerr            z_gzclearerr
+#    define gzclose               z_gzclose
+#    define gzclose_r             z_gzclose_r
+#    define gzclose_w             z_gzclose_w
+#    define gzdirect              z_gzdirect
+#    define gzdopen               z_gzdopen
+#    define gzeof                 z_gzeof
+#    define gzerror               z_gzerror
+#    define gzflush               z_gzflush
+#    define gzfread               z_gzfread
+#    define gzfwrite              z_gzfwrite
+#    define gzgetc                z_gzgetc
+#    define gzgetc_               z_gzgetc_
+#    define gzgets                z_gzgets
+#    define gzoffset              z_gzoffset
+#    define gzoffset64            z_gzoffset64
+#    define gzopen                z_gzopen
+#    define gzopen64              z_gzopen64
+#    ifdef _WIN32
+#      define gzopen_w              z_gzopen_w
+#    endif
+#    define gzprintf              z_gzprintf
+#    define gzputc                z_gzputc
+#    define gzputs                z_gzputs
+#    define gzread                z_gzread
+#    define gzrewind              z_gzrewind
+#    define gzseek                z_gzseek
+#    define gzseek64              z_gzseek64
+#    define gzsetparams           z_gzsetparams
+#    define gztell                z_gztell
+#    define gztell64              z_gztell64
+#    define gzungetc              z_gzungetc
+#    define gzvprintf             z_gzvprintf
+#    define gzwrite               z_gzwrite
+#  endif
+#  define inflate               z_inflate
+#  define inflateBack           z_inflateBack
+#  define inflateBackEnd        z_inflateBackEnd
+#  define inflateBackInit       z_inflateBackInit
+#  define inflateBackInit_      z_inflateBackInit_
+#  define inflateCodesUsed      z_inflateCodesUsed
+#  define inflateCopy           z_inflateCopy
+#  define inflateEnd            z_inflateEnd
+#  define inflateGetDictionary  z_inflateGetDictionary
+#  define inflateGetHeader      z_inflateGetHeader
+#  define inflateInit           z_inflateInit
+#  define inflateInit2          z_inflateInit2
+#  define inflateInit2_         z_inflateInit2_
+#  define inflateInit_          z_inflateInit_
+#  define inflateMark           z_inflateMark
+#  define inflatePrime          z_inflatePrime
+#  define inflateReset          z_inflateReset
+#  define inflateReset2         z_inflateReset2
+#  define inflateResetKeep      z_inflateResetKeep
+#  define inflateSetDictionary  z_inflateSetDictionary
+#  define inflateSync           z_inflateSync
+#  define inflateSyncPoint      z_inflateSyncPoint
+#  define inflateUndermine      z_inflateUndermine
+#  define inflateValidate       z_inflateValidate
+#  define inflate_copyright     z_inflate_copyright
+#  define inflate_fast          z_inflate_fast
+#  define inflate_table         z_inflate_table
+#  ifndef Z_SOLO
+#    define uncompress            z_uncompress
+#    define uncompress2           z_uncompress2
+#  endif
+#  define zError                z_zError
+#  ifndef Z_SOLO
+#    define zcalloc               z_zcalloc
+#    define zcfree                z_zcfree
+#  endif
+#  define zlibCompileFlags      z_zlibCompileFlags
+#  define zlibVersion           z_zlibVersion
+
+/* all zlib typedefs in zlib.h and zconf.h */
+#  define Byte                  z_Byte
+#  define Bytef                 z_Bytef
+#  define alloc_func            z_alloc_func
+#  define charf                 z_charf
+#  define free_func             z_free_func
+#  ifndef Z_SOLO
+#    define gzFile                z_gzFile
+#  endif
+#  define gz_header             z_gz_header
+#  define gz_headerp            z_gz_headerp
+#  define in_func               z_in_func
+#  define intf                  z_intf
+#  define out_func              z_out_func
+#  define uInt                  z_uInt
+#  define uIntf                 z_uIntf
+#  define uLong                 z_uLong
+#  define uLongf                z_uLongf
+#  define voidp                 z_voidp
+#  define voidpc                z_voidpc
+#  define voidpf                z_voidpf
+
+/* all zlib structs in zlib.h and zconf.h */
+#  define gz_header_s           z_gz_header_s
+#  define internal_state        z_internal_state
+
+#endif
+
+#if defined(__MSDOS__) && !defined(MSDOS)
+#  define MSDOS
+#endif
+#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2)
+#  define OS2
+#endif
+#if defined(_WINDOWS) && !defined(WINDOWS)
+#  define WINDOWS
+#endif
+#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__)
+#  ifndef WIN32
+#    define WIN32
+#  endif
+#endif
+#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32)
+#  if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__)
+#    ifndef SYS16BIT
+#      define SYS16BIT
+#    endif
+#  endif
+#endif
+
+/*
+ * Compile with -DMAXSEG_64K if the alloc function cannot allocate more
+ * than 64k bytes at a time (needed on systems with 16-bit int).
+ */
+#ifdef SYS16BIT
+#  define MAXSEG_64K
+#endif
+#ifdef MSDOS
+#  define UNALIGNED_OK
+#endif
+
+#ifdef __STDC_VERSION__
+#  ifndef STDC
+#    define STDC
+#  endif
+#  if __STDC_VERSION__ >= 199901L
+#    ifndef STDC99
+#      define STDC99
+#    endif
+#  endif
+#endif
+#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus))
+#  define STDC
+#endif
+#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__))
+#  define STDC
+#endif
+#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32))
+#  define STDC
+#endif
+#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__))
+#  define STDC
+#endif
+
+#if defined(__OS400__) && !defined(STDC)    /* iSeries (formerly AS/400). */
+#  define STDC
+#endif
+
+#ifndef STDC
+#  ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */
+#    define const       /* note: need a more gentle solution here */
+#  endif
+#endif
+
+#if defined(ZLIB_CONST) && !defined(z_const)
+#  define z_const const
+#else
+#  define z_const
+#endif
+
+#ifdef Z_SOLO
+   typedef unsigned long z_size_t;
+#else
+#  define z_longlong long long
+#  if defined(NO_SIZE_T)
+     typedef unsigned NO_SIZE_T z_size_t;
+#  elif defined(STDC)
+#    include <stddef.h>
+     typedef size_t z_size_t;
+#  else
+     typedef unsigned long z_size_t;
+#  endif
+#  undef z_longlong
+#endif
+
+/* Maximum value for memLevel in deflateInit2 */
+#ifndef MAX_MEM_LEVEL
+#  ifdef MAXSEG_64K
+#    define MAX_MEM_LEVEL 8
+#  else
+#    define MAX_MEM_LEVEL 9
+#  endif
+#endif
+
+/* Maximum value for windowBits in deflateInit2 and inflateInit2.
+ * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files
+ * created by gzip. (Files created by minigzip can still be extracted by
+ * gzip.)
+ */
+#ifndef MAX_WBITS
+#  define MAX_WBITS   15 /* 32K LZ77 window */
+#endif
+
+/* The memory requirements for deflate are (in bytes):
+            (1 << (windowBits+2)) +  (1 << (memLevel+9))
+ that is: 128K for windowBits=15  +  128K for memLevel = 8  (default values)
+ plus a few kilobytes for small objects. For example, if you want to reduce
+ the default memory requirements from 256K to 128K, compile with
+     make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7"
+ Of course this will generally degrade compression (there's no free lunch).
+
+   The memory requirements for inflate are (in bytes) 1 << windowBits
+ that is, 32K for windowBits=15 (default value) plus about 7 kilobytes
+ for small objects.
+*/
+
+                        /* Type declarations */
+
+#ifndef OF /* function prototypes */
+#  ifdef STDC
+#    define OF(args)  args
+#  else
+#    define OF(args)  ()
+#  endif
+#endif
+
+#ifndef Z_ARG /* function prototypes for stdarg */
+#  if defined(STDC) || defined(Z_HAVE_STDARG_H)
+#    define Z_ARG(args)  args
+#  else
+#    define Z_ARG(args)  ()
+#  endif
+#endif
+
+/* The following definitions for FAR are needed only for MSDOS mixed
+ * model programming (small or medium model with some far allocations).
+ * This was tested only with MSC; for other MSDOS compilers you may have
+ * to define NO_MEMCPY in zutil.h.  If you don't need the mixed model,
+ * just define FAR to be empty.
+ */
+#ifdef SYS16BIT
+#  if defined(M_I86SM) || defined(M_I86MM)
+     /* MSC small or medium model */
+#    define SMALL_MEDIUM
+#    ifdef _MSC_VER
+#      define FAR _far
+#    else
+#      define FAR far
+#    endif
+#  endif
+#  if (defined(__SMALL__) || defined(__MEDIUM__))
+     /* Turbo C small or medium model */
+#    define SMALL_MEDIUM
+#    ifdef __BORLANDC__
+#      define FAR _far
+#    else
+#      define FAR far
+#    endif
+#  endif
+#endif
+
+#if defined(WINDOWS) || defined(WIN32)
+   /* If building or using zlib as a DLL, define ZLIB_DLL.
+    * This is not mandatory, but it offers a little performance increase.
+    */
+#  ifdef ZLIB_DLL
+#    if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500))
+#      ifdef ZLIB_INTERNAL
+#        define ZEXTERN extern __declspec(dllexport)
+#      else
+#        define ZEXTERN extern __declspec(dllimport)
+#      endif
+#    endif
+#  endif  /* ZLIB_DLL */
+   /* If building or using zlib with the WINAPI/WINAPIV calling convention,
+    * define ZLIB_WINAPI.
+    * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI.
+    */
+#  ifdef ZLIB_WINAPI
+#    ifdef FAR
+#      undef FAR
+#    endif
+#    include <windows.h>
+     /* No need for _export, use ZLIB.DEF instead. */
+     /* For complete Windows compatibility, use WINAPI, not __stdcall. */
+#    define ZEXPORT WINAPI
+#    ifdef WIN32
+#      define ZEXPORTVA WINAPIV
+#    else
+#      define ZEXPORTVA FAR CDECL
+#    endif
+#  endif
+#endif
+
+#if defined (__BEOS__)
+#  ifdef ZLIB_DLL
+#    ifdef ZLIB_INTERNAL
+#      define ZEXPORT   __declspec(dllexport)
+#      define ZEXPORTVA __declspec(dllexport)
+#    else
+#      define ZEXPORT   __declspec(dllimport)
+#      define ZEXPORTVA __declspec(dllimport)
+#    endif
+#  endif
+#endif
+
+#ifndef ZEXTERN
+#  define ZEXTERN extern
+#endif
+#ifndef ZEXPORT
+#  define ZEXPORT
+#endif
+#ifndef ZEXPORTVA
+#  define ZEXPORTVA
+#endif
+
+#ifndef FAR
+#  define FAR
+#endif
+
+#if !defined(__MACTYPES__)
+typedef unsigned char  Byte;  /* 8 bits */
+#endif
+typedef unsigned int   uInt;  /* 16 bits or more */
+typedef unsigned long  uLong; /* 32 bits or more */
+
+#ifdef SMALL_MEDIUM
+   /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */
+#  define Bytef Byte FAR
+#else
+   typedef Byte  FAR Bytef;
+#endif
+typedef char  FAR charf;
+typedef int   FAR intf;
+typedef uInt  FAR uIntf;
+typedef uLong FAR uLongf;
+
+#ifdef STDC
+   typedef void const *voidpc;
+   typedef void FAR   *voidpf;
+   typedef void       *voidp;
+#else
+   typedef Byte const *voidpc;
+   typedef Byte FAR   *voidpf;
+   typedef Byte       *voidp;
+#endif
+
+#if !defined(Z_U4) && !defined(Z_SOLO) && defined(STDC)
+#  include <limits.h>
+#  if (UINT_MAX == 0xffffffffUL)
+#    define Z_U4 unsigned
+#  elif (ULONG_MAX == 0xffffffffUL)
+#    define Z_U4 unsigned long
+#  elif (USHRT_MAX == 0xffffffffUL)
+#    define Z_U4 unsigned short
+#  endif
+#endif
+
+#ifdef Z_U4
+   typedef Z_U4 z_crc_t;
+#else
+   typedef unsigned long z_crc_t;
+#endif
+
+#ifdef HAVE_UNISTD_H    /* may be set to #if 1 by ./configure */
+#  define Z_HAVE_UNISTD_H
+#endif
+
+#ifdef HAVE_STDARG_H    /* may be set to #if 1 by ./configure */
+#  define Z_HAVE_STDARG_H
+#endif
+
+#ifdef STDC
+#  ifndef Z_SOLO
+#    include <sys/types.h>      /* for off_t */
+#  endif
+#endif
+
+#if defined(STDC) || defined(Z_HAVE_STDARG_H)
+#  ifndef Z_SOLO
+#    include <stdarg.h>         /* for va_list */
+#  endif
+#endif
+
+#ifdef _WIN32
+#  ifndef Z_SOLO
+#    include <stddef.h>         /* for wchar_t */
+#  endif
+#endif
+
+/* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and
+ * "#define _LARGEFILE64_SOURCE 1" as requesting 64-bit operations, (even
+ * though the former does not conform to the LFS document), but considering
+ * both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as
+ * equivalently requesting no 64-bit operations
+ */
+#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1
+#  undef _LARGEFILE64_SOURCE
+#endif
+
+#if defined(__WATCOMC__) && !defined(Z_HAVE_UNISTD_H)
+#  define Z_HAVE_UNISTD_H
+#endif
+#ifndef Z_SOLO
+#  if defined(Z_HAVE_UNISTD_H) || defined(_LARGEFILE64_SOURCE)
+#    include <unistd.h>         /* for SEEK_*, off_t, and _LFS64_LARGEFILE */
+#    ifdef VMS
+#      include <unixio.h>       /* for off_t */
+#    endif
+#    ifndef z_off_t
+#      define z_off_t off_t
+#    endif
+#  endif
+#endif
+
+#if defined(_LFS64_LARGEFILE) && _LFS64_LARGEFILE-0
+#  define Z_LFS64
+#endif
+
+#if defined(_LARGEFILE64_SOURCE) && defined(Z_LFS64)
+#  define Z_LARGE64
+#endif
+
+#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS-0 == 64 && defined(Z_LFS64)
+#  define Z_WANT64
+#endif
+
+#if !defined(SEEK_SET) && !defined(Z_SOLO)
+#  define SEEK_SET        0       /* Seek from beginning of file.  */
+#  define SEEK_CUR        1       /* Seek from current position.  */
+#  define SEEK_END        2       /* Set file pointer to EOF plus "offset" */
+#endif
+
+#ifndef z_off_t
+#  define z_off_t long
+#endif
+
+#if !defined(_WIN32) && defined(Z_LARGE64)
+#  define z_off64_t off64_t
+#else
+#  if defined(_WIN32) && !defined(__GNUC__) && !defined(Z_SOLO)
+#    define z_off64_t __int64
+#  else
+#    define z_off64_t z_off_t
+#  endif
+#endif
+
+/* MVS linker does not support external names larger than 8 bytes */
+#if defined(__MVS__)
+  #pragma map(deflateInit_,"DEIN")
+  #pragma map(deflateInit2_,"DEIN2")
+  #pragma map(deflateEnd,"DEEND")
+  #pragma map(deflateBound,"DEBND")
+  #pragma map(inflateInit_,"ININ")
+  #pragma map(inflateInit2_,"ININ2")
+  #pragma map(inflateEnd,"INEND")
+  #pragma map(inflateSync,"INSY")
+  #pragma map(inflateSetDictionary,"INSEDI")
+  #pragma map(compressBound,"CMBND")
+  #pragma map(inflate_table,"INTABL")
+  #pragma map(inflate_fast,"INFA")
+  #pragma map(inflate_copyright,"INCOPY")
+#endif
+
+#endif /* ZCONF_H */
diff --git a/libraries/zlib/zlib.3 b/libraries/zlib/zlib.3
new file mode 100644
index 000000000..bda4eb073
--- /dev/null
+++ b/libraries/zlib/zlib.3
@@ -0,0 +1,149 @@
+.TH ZLIB 3 "15 Jan 2017"
+.SH NAME
+zlib \- compression/decompression library
+.SH SYNOPSIS
+[see
+.I zlib.h
+for full description]
+.SH DESCRIPTION
+The
+.I zlib
+library is a general purpose data compression library.
+The code is thread safe, assuming that the standard library functions
+used are thread safe, such as memory allocation routines.
+It provides in-memory compression and decompression functions,
+including integrity checks of the uncompressed data.
+This version of the library supports only one compression method (deflation)
+but other algorithms may be added later
+with the same stream interface.
+.LP
+Compression can be done in a single step if the buffers are large enough
+or can be done by repeated calls of the compression function.
+In the latter case,
+the application must provide more input and/or consume the output
+(providing more output space) before each call.
+.LP
+The library also supports reading and writing files in
+.IR gzip (1)
+(.gz) format
+with an interface similar to that of stdio.
+.LP
+The library does not install any signal handler.
+The decoder checks the consistency of the compressed data,
+so the library should never crash even in the case of corrupted input.
+.LP
+All functions of the compression library are documented in the file
+.IR zlib.h .
+The distribution source includes examples of use of the library
+in the files
+.I test/example.c
+and
+.IR test/minigzip.c,
+as well as other examples in the
+.IR examples/
+directory.
+.LP
+Changes to this version are documented in the file
+.I ChangeLog
+that accompanies the source.
+.LP
+.I zlib
+is built in to many languages and operating systems, including but not limited to
+Java, Python, .NET, PHP, Perl, Ruby, Swift, and Go.
+.LP
+An experimental package to read and write files in the .zip format,
+written on top of
+.I zlib
+by Gilles Vollant (info@winimage.com),
+is available at:
+.IP
+http://www.winimage.com/zLibDll/minizip.html
+and also in the
+.I contrib/minizip
+directory of the main
+.I zlib
+source distribution.
+.SH "SEE ALSO"
+The
+.I zlib
+web site can be found at:
+.IP
+http://zlib.net/
+.LP
+The data format used by the
+.I zlib
+library is described by RFC
+(Request for Comments) 1950 to 1952 in the files:
+.IP
+http://tools.ietf.org/html/rfc1950 (for the zlib header and trailer format)
+.br
+http://tools.ietf.org/html/rfc1951 (for the deflate compressed data format)
+.br
+http://tools.ietf.org/html/rfc1952 (for the gzip header and trailer format)
+.LP
+Mark Nelson wrote an article about
+.I zlib
+for the Jan. 1997 issue of  Dr. Dobb's Journal;
+a copy of the article is available at:
+.IP
+http://marknelson.us/1997/01/01/zlib-engine/
+.SH "REPORTING PROBLEMS"
+Before reporting a problem,
+please check the
+.I zlib
+web site to verify that you have the latest version of
+.IR zlib ;
+otherwise,
+obtain the latest version and see if the problem still exists.
+Please read the
+.I zlib
+FAQ at:
+.IP
+http://zlib.net/zlib_faq.html
+.LP
+before asking for help.
+Send questions and/or comments to zlib@gzip.org,
+or (for the Windows DLL version) to Gilles Vollant (info@winimage.com).
+.SH AUTHORS AND LICENSE
+Version 1.2.11
+.LP
+Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler
+.LP
+This software is provided 'as-is', without any express or implied
+warranty.  In no event will the authors be held liable for any damages
+arising from the use of this software.
+.LP
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+.LP
+.nr step 1 1
+.IP \n[step]. 3
+The origin of this software must not be misrepresented; you must not
+claim that you wrote the original software. If you use this software
+in a product, an acknowledgment in the product documentation would be
+appreciated but is not required.
+.IP \n+[step].
+Altered source versions must be plainly marked as such, and must not be
+misrepresented as being the original software.
+.IP \n+[step].
+This notice may not be removed or altered from any source distribution.
+.LP
+Jean-loup Gailly        Mark Adler
+.br
+jloup@gzip.org          madler@alumni.caltech.edu
+.LP
+The deflate format used by
+.I zlib
+was defined by Phil Katz.
+The deflate and
+.I zlib
+specifications were written by L. Peter Deutsch.
+Thanks to all the people who reported problems and suggested various
+improvements in
+.IR zlib ;
+who are too numerous to cite here.
+.LP
+UNIX manual page by R. P. C. Rodgers,
+U.S. National Library of Medicine (rodgers@nlm.nih.gov).
+.\" end of man page
diff --git a/libraries/zlib/zlib.3.pdf b/libraries/zlib/zlib.3.pdf
new file mode 100644
index 000000000..6fa519c5b
Binary files /dev/null and b/libraries/zlib/zlib.3.pdf differ
diff --git a/libraries/zlib/zlib.h b/libraries/zlib/zlib.h
new file mode 100644
index 000000000..f09cdaf1e
--- /dev/null
+++ b/libraries/zlib/zlib.h
@@ -0,0 +1,1912 @@
+/* zlib.h -- interface of the 'zlib' general purpose compression library
+  version 1.2.11, January 15th, 2017
+
+  Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+
+  Jean-loup Gailly        Mark Adler
+  jloup@gzip.org          madler@alumni.caltech.edu
+
+
+  The data format used by the zlib library is described by RFCs (Request for
+  Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950
+  (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format).
+*/
+
+#ifndef ZLIB_H
+#define ZLIB_H
+
+#include "zconf.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ZLIB_VERSION "1.2.11"
+#define ZLIB_VERNUM 0x12b0
+#define ZLIB_VER_MAJOR 1
+#define ZLIB_VER_MINOR 2
+#define ZLIB_VER_REVISION 11
+#define ZLIB_VER_SUBREVISION 0
+
+/*
+    The 'zlib' compression library provides in-memory compression and
+  decompression functions, including integrity checks of the uncompressed data.
+  This version of the library supports only one compression method (deflation)
+  but other algorithms will be added later and will have the same stream
+  interface.
+
+    Compression can be done in a single step if the buffers are large enough,
+  or can be done by repeated calls of the compression function.  In the latter
+  case, the application must provide more input and/or consume the output
+  (providing more output space) before each call.
+
+    The compressed data format used by default by the in-memory functions is
+  the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped
+  around a deflate stream, which is itself documented in RFC 1951.
+
+    The library also supports reading and writing files in gzip (.gz) format
+  with an interface similar to that of stdio using the functions that start
+  with "gz".  The gzip format is different from the zlib format.  gzip is a
+  gzip wrapper, documented in RFC 1952, wrapped around a deflate stream.
+
+    This library can optionally read and write gzip and raw deflate streams in
+  memory as well.
+
+    The zlib format was designed to be compact and fast for use in memory
+  and on communications channels.  The gzip format was designed for single-
+  file compression on file systems, has a larger header than zlib to maintain
+  directory information, and uses a different, slower check method than zlib.
+
+    The library does not install any signal handler.  The decoder checks
+  the consistency of the compressed data, so the library should never crash
+  even in the case of corrupted input.
+*/
+
+typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size));
+typedef void   (*free_func)  OF((voidpf opaque, voidpf address));
+
+struct internal_state;
+
+typedef struct z_stream_s {
+    z_const Bytef *next_in;     /* next input byte */
+    uInt     avail_in;  /* number of bytes available at next_in */
+    uLong    total_in;  /* total number of input bytes read so far */
+
+    Bytef    *next_out; /* next output byte will go here */
+    uInt     avail_out; /* remaining free space at next_out */
+    uLong    total_out; /* total number of bytes output so far */
+
+    z_const char *msg;  /* last error message, NULL if no error */
+    struct internal_state FAR *state; /* not visible by applications */
+
+    alloc_func zalloc;  /* used to allocate the internal state */
+    free_func  zfree;   /* used to free the internal state */
+    voidpf     opaque;  /* private data object passed to zalloc and zfree */
+
+    int     data_type;  /* best guess about the data type: binary or text
+                           for deflate, or the decoding state for inflate */
+    uLong   adler;      /* Adler-32 or CRC-32 value of the uncompressed data */
+    uLong   reserved;   /* reserved for future use */
+} z_stream;
+
+typedef z_stream FAR *z_streamp;
+
+/*
+     gzip header information passed to and from zlib routines.  See RFC 1952
+  for more details on the meanings of these fields.
+*/
+typedef struct gz_header_s {
+    int     text;       /* true if compressed data believed to be text */
+    uLong   time;       /* modification time */
+    int     xflags;     /* extra flags (not used when writing a gzip file) */
+    int     os;         /* operating system */
+    Bytef   *extra;     /* pointer to extra field or Z_NULL if none */
+    uInt    extra_len;  /* extra field length (valid if extra != Z_NULL) */
+    uInt    extra_max;  /* space at extra (only when reading header) */
+    Bytef   *name;      /* pointer to zero-terminated file name or Z_NULL */
+    uInt    name_max;   /* space at name (only when reading header) */
+    Bytef   *comment;   /* pointer to zero-terminated comment or Z_NULL */
+    uInt    comm_max;   /* space at comment (only when reading header) */
+    int     hcrc;       /* true if there was or will be a header crc */
+    int     done;       /* true when done reading gzip header (not used
+                           when writing a gzip file) */
+} gz_header;
+
+typedef gz_header FAR *gz_headerp;
+
+/*
+     The application must update next_in and avail_in when avail_in has dropped
+   to zero.  It must update next_out and avail_out when avail_out has dropped
+   to zero.  The application must initialize zalloc, zfree and opaque before
+   calling the init function.  All other fields are set by the compression
+   library and must not be updated by the application.
+
+     The opaque value provided by the application will be passed as the first
+   parameter for calls of zalloc and zfree.  This can be useful for custom
+   memory management.  The compression library attaches no meaning to the
+   opaque value.
+
+     zalloc must return Z_NULL if there is not enough memory for the object.
+   If zlib is used in a multi-threaded application, zalloc and zfree must be
+   thread safe.  In that case, zlib is thread-safe.  When zalloc and zfree are
+   Z_NULL on entry to the initialization function, they are set to internal
+   routines that use the standard library functions malloc() and free().
+
+     On 16-bit systems, the functions zalloc and zfree must be able to allocate
+   exactly 65536 bytes, but will not be required to allocate more than this if
+   the symbol MAXSEG_64K is defined (see zconf.h).  WARNING: On MSDOS, pointers
+   returned by zalloc for objects of exactly 65536 bytes *must* have their
+   offset normalized to zero.  The default allocation function provided by this
+   library ensures this (see zutil.c).  To reduce memory requirements and avoid
+   any allocation of 64K objects, at the expense of compression ratio, compile
+   the library with -DMAX_WBITS=14 (see zconf.h).
+
+     The fields total_in and total_out can be used for statistics or progress
+   reports.  After compression, total_in holds the total size of the
+   uncompressed data and may be saved for use by the decompressor (particularly
+   if the decompressor wants to decompress everything in a single step).
+*/
+
+                        /* constants */
+
+#define Z_NO_FLUSH      0
+#define Z_PARTIAL_FLUSH 1
+#define Z_SYNC_FLUSH    2
+#define Z_FULL_FLUSH    3
+#define Z_FINISH        4
+#define Z_BLOCK         5
+#define Z_TREES         6
+/* Allowed flush values; see deflate() and inflate() below for details */
+
+#define Z_OK            0
+#define Z_STREAM_END    1
+#define Z_NEED_DICT     2
+#define Z_ERRNO        (-1)
+#define Z_STREAM_ERROR (-2)
+#define Z_DATA_ERROR   (-3)
+#define Z_MEM_ERROR    (-4)
+#define Z_BUF_ERROR    (-5)
+#define Z_VERSION_ERROR (-6)
+/* Return codes for the compression/decompression functions. Negative values
+ * are errors, positive values are used for special but normal events.
+ */
+
+#define Z_NO_COMPRESSION         0
+#define Z_BEST_SPEED             1
+#define Z_BEST_COMPRESSION       9
+#define Z_DEFAULT_COMPRESSION  (-1)
+/* compression levels */
+
+#define Z_FILTERED            1
+#define Z_HUFFMAN_ONLY        2
+#define Z_RLE                 3
+#define Z_FIXED               4
+#define Z_DEFAULT_STRATEGY    0
+/* compression strategy; see deflateInit2() below for details */
+
+#define Z_BINARY   0
+#define Z_TEXT     1
+#define Z_ASCII    Z_TEXT   /* for compatibility with 1.2.2 and earlier */
+#define Z_UNKNOWN  2
+/* Possible values of the data_type field for deflate() */
+
+#define Z_DEFLATED   8
+/* The deflate compression method (the only one supported in this version) */
+
+#define Z_NULL  0  /* for initializing zalloc, zfree, opaque */
+
+#define zlib_version zlibVersion()
+/* for compatibility with versions < 1.0.2 */
+
+
+                        /* basic functions */
+
+ZEXTERN const char * ZEXPORT zlibVersion OF((void));
+/* The application can compare zlibVersion and ZLIB_VERSION for consistency.
+   If the first character differs, the library code actually used is not
+   compatible with the zlib.h header file used by the application.  This check
+   is automatically made by deflateInit and inflateInit.
+ */
+
+/*
+ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level));
+
+     Initializes the internal stream state for compression.  The fields
+   zalloc, zfree and opaque must be initialized before by the caller.  If
+   zalloc and zfree are set to Z_NULL, deflateInit updates them to use default
+   allocation functions.
+
+     The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9:
+   1 gives best speed, 9 gives best compression, 0 gives no compression at all
+   (the input data is simply copied a block at a time).  Z_DEFAULT_COMPRESSION
+   requests a default compromise between speed and compression (currently
+   equivalent to level 6).
+
+     deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_STREAM_ERROR if level is not a valid compression level, or
+   Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible
+   with the version assumed by the caller (ZLIB_VERSION).  msg is set to null
+   if there is no error message.  deflateInit does not perform any compression:
+   this will be done by deflate().
+*/
+
+
+ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush));
+/*
+    deflate compresses as much data as possible, and stops when the input
+  buffer becomes empty or the output buffer becomes full.  It may introduce
+  some output latency (reading input without producing any output) except when
+  forced to flush.
+
+    The detailed semantics are as follows.  deflate performs one or both of the
+  following actions:
+
+  - Compress more input starting at next_in and update next_in and avail_in
+    accordingly.  If not all input can be processed (because there is not
+    enough room in the output buffer), next_in and avail_in are updated and
+    processing will resume at this point for the next call of deflate().
+
+  - Generate more output starting at next_out and update next_out and avail_out
+    accordingly.  This action is forced if the parameter flush is non zero.
+    Forcing flush frequently degrades the compression ratio, so this parameter
+    should be set only when necessary.  Some output may be provided even if
+    flush is zero.
+
+    Before the call of deflate(), the application should ensure that at least
+  one of the actions is possible, by providing more input and/or consuming more
+  output, and updating avail_in or avail_out accordingly; avail_out should
+  never be zero before the call.  The application can consume the compressed
+  output when it wants, for example when the output buffer is full (avail_out
+  == 0), or after each call of deflate().  If deflate returns Z_OK and with
+  zero avail_out, it must be called again after making room in the output
+  buffer because there might be more output pending. See deflatePending(),
+  which can be used if desired to determine whether or not there is more ouput
+  in that case.
+
+    Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to
+  decide how much data to accumulate before producing output, in order to
+  maximize compression.
+
+    If the parameter flush is set to Z_SYNC_FLUSH, all pending output is
+  flushed to the output buffer and the output is aligned on a byte boundary, so
+  that the decompressor can get all input data available so far.  (In
+  particular avail_in is zero after the call if enough output space has been
+  provided before the call.) Flushing may degrade compression for some
+  compression algorithms and so it should be used only when necessary.  This
+  completes the current deflate block and follows it with an empty stored block
+  that is three bits plus filler bits to the next byte, followed by four bytes
+  (00 00 ff ff).
+
+    If flush is set to Z_PARTIAL_FLUSH, all pending output is flushed to the
+  output buffer, but the output is not aligned to a byte boundary.  All of the
+  input data so far will be available to the decompressor, as for Z_SYNC_FLUSH.
+  This completes the current deflate block and follows it with an empty fixed
+  codes block that is 10 bits long.  This assures that enough bytes are output
+  in order for the decompressor to finish the block before the empty fixed
+  codes block.
+
+    If flush is set to Z_BLOCK, a deflate block is completed and emitted, as
+  for Z_SYNC_FLUSH, but the output is not aligned on a byte boundary, and up to
+  seven bits of the current block are held to be written as the next byte after
+  the next deflate block is completed.  In this case, the decompressor may not
+  be provided enough bits at this point in order to complete decompression of
+  the data provided so far to the compressor.  It may need to wait for the next
+  block to be emitted.  This is for advanced applications that need to control
+  the emission of deflate blocks.
+
+    If flush is set to Z_FULL_FLUSH, all output is flushed as with
+  Z_SYNC_FLUSH, and the compression state is reset so that decompression can
+  restart from this point if previous compressed data has been damaged or if
+  random access is desired.  Using Z_FULL_FLUSH too often can seriously degrade
+  compression.
+
+    If deflate returns with avail_out == 0, this function must be called again
+  with the same value of the flush parameter and more output space (updated
+  avail_out), until the flush is complete (deflate returns with non-zero
+  avail_out).  In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that
+  avail_out is greater than six to avoid repeated flush markers due to
+  avail_out == 0 on return.
+
+    If the parameter flush is set to Z_FINISH, pending input is processed,
+  pending output is flushed and deflate returns with Z_STREAM_END if there was
+  enough output space.  If deflate returns with Z_OK or Z_BUF_ERROR, this
+  function must be called again with Z_FINISH and more output space (updated
+  avail_out) but no more input data, until it returns with Z_STREAM_END or an
+  error.  After deflate has returned Z_STREAM_END, the only possible operations
+  on the stream are deflateReset or deflateEnd.
+
+    Z_FINISH can be used in the first deflate call after deflateInit if all the
+  compression is to be done in a single step.  In order to complete in one
+  call, avail_out must be at least the value returned by deflateBound (see
+  below).  Then deflate is guaranteed to return Z_STREAM_END.  If not enough
+  output space is provided, deflate will not return Z_STREAM_END, and it must
+  be called again as described above.
+
+    deflate() sets strm->adler to the Adler-32 checksum of all input read
+  so far (that is, total_in bytes).  If a gzip stream is being generated, then
+  strm->adler will be the CRC-32 checksum of the input read so far.  (See
+  deflateInit2 below.)
+
+    deflate() may update strm->data_type if it can make a good guess about
+  the input data type (Z_BINARY or Z_TEXT).  If in doubt, the data is
+  considered binary.  This field is only for information purposes and does not
+  affect the compression algorithm in any manner.
+
+    deflate() returns Z_OK if some progress has been made (more input
+  processed or more output produced), Z_STREAM_END if all input has been
+  consumed and all output has been produced (only when flush is set to
+  Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example
+  if next_in or next_out was Z_NULL or the state was inadvertently written over
+  by the application), or Z_BUF_ERROR if no progress is possible (for example
+  avail_in or avail_out was zero).  Note that Z_BUF_ERROR is not fatal, and
+  deflate() can be called again with more input and more output space to
+  continue compressing.
+*/
+
+
+ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm));
+/*
+     All dynamically allocated data structures for this stream are freed.
+   This function discards any unprocessed input and does not flush any pending
+   output.
+
+     deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the
+   stream state was inconsistent, Z_DATA_ERROR if the stream was freed
+   prematurely (some input or output was discarded).  In the error case, msg
+   may be set but then points to a static string (which must not be
+   deallocated).
+*/
+
+
+/*
+ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm));
+
+     Initializes the internal stream state for decompression.  The fields
+   next_in, avail_in, zalloc, zfree and opaque must be initialized before by
+   the caller.  In the current version of inflate, the provided input is not
+   read or consumed.  The allocation of a sliding window will be deferred to
+   the first call of inflate (if the decompression does not complete on the
+   first call).  If zalloc and zfree are set to Z_NULL, inflateInit updates
+   them to use default allocation functions.
+
+     inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_VERSION_ERROR if the zlib library version is incompatible with the
+   version assumed by the caller, or Z_STREAM_ERROR if the parameters are
+   invalid, such as a null pointer to the structure.  msg is set to null if
+   there is no error message.  inflateInit does not perform any decompression.
+   Actual decompression will be done by inflate().  So next_in, and avail_in,
+   next_out, and avail_out are unused and unchanged.  The current
+   implementation of inflateInit() does not process any header information --
+   that is deferred until inflate() is called.
+*/
+
+
+ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush));
+/*
+    inflate decompresses as much data as possible, and stops when the input
+  buffer becomes empty or the output buffer becomes full.  It may introduce
+  some output latency (reading input without producing any output) except when
+  forced to flush.
+
+  The detailed semantics are as follows.  inflate performs one or both of the
+  following actions:
+
+  - Decompress more input starting at next_in and update next_in and avail_in
+    accordingly.  If not all input can be processed (because there is not
+    enough room in the output buffer), then next_in and avail_in are updated
+    accordingly, and processing will resume at this point for the next call of
+    inflate().
+
+  - Generate more output starting at next_out and update next_out and avail_out
+    accordingly.  inflate() provides as much output as possible, until there is
+    no more input data or no more space in the output buffer (see below about
+    the flush parameter).
+
+    Before the call of inflate(), the application should ensure that at least
+  one of the actions is possible, by providing more input and/or consuming more
+  output, and updating the next_* and avail_* values accordingly.  If the
+  caller of inflate() does not provide both available input and available
+  output space, it is possible that there will be no progress made.  The
+  application can consume the uncompressed output when it wants, for example
+  when the output buffer is full (avail_out == 0), or after each call of
+  inflate().  If inflate returns Z_OK and with zero avail_out, it must be
+  called again after making room in the output buffer because there might be
+  more output pending.
+
+    The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FINISH,
+  Z_BLOCK, or Z_TREES.  Z_SYNC_FLUSH requests that inflate() flush as much
+  output as possible to the output buffer.  Z_BLOCK requests that inflate()
+  stop if and when it gets to the next deflate block boundary.  When decoding
+  the zlib or gzip format, this will cause inflate() to return immediately
+  after the header and before the first block.  When doing a raw inflate,
+  inflate() will go ahead and process the first block, and will return when it
+  gets to the end of that block, or when it runs out of data.
+
+    The Z_BLOCK option assists in appending to or combining deflate streams.
+  To assist in this, on return inflate() always sets strm->data_type to the
+  number of unused bits in the last byte taken from strm->next_in, plus 64 if
+  inflate() is currently decoding the last block in the deflate stream, plus
+  128 if inflate() returned immediately after decoding an end-of-block code or
+  decoding the complete header up to just before the first byte of the deflate
+  stream.  The end-of-block will not be indicated until all of the uncompressed
+  data from that block has been written to strm->next_out.  The number of
+  unused bits may in general be greater than seven, except when bit 7 of
+  data_type is set, in which case the number of unused bits will be less than
+  eight.  data_type is set as noted here every time inflate() returns for all
+  flush options, and so can be used to determine the amount of currently
+  consumed input in bits.
+
+    The Z_TREES option behaves as Z_BLOCK does, but it also returns when the
+  end of each deflate block header is reached, before any actual data in that
+  block is decoded.  This allows the caller to determine the length of the
+  deflate block header for later use in random access within a deflate block.
+  256 is added to the value of strm->data_type when inflate() returns
+  immediately after reaching the end of the deflate block header.
+
+    inflate() should normally be called until it returns Z_STREAM_END or an
+  error.  However if all decompression is to be performed in a single step (a
+  single call of inflate), the parameter flush should be set to Z_FINISH.  In
+  this case all pending input is processed and all pending output is flushed;
+  avail_out must be large enough to hold all of the uncompressed data for the
+  operation to complete.  (The size of the uncompressed data may have been
+  saved by the compressor for this purpose.)  The use of Z_FINISH is not
+  required to perform an inflation in one step.  However it may be used to
+  inform inflate that a faster approach can be used for the single inflate()
+  call.  Z_FINISH also informs inflate to not maintain a sliding window if the
+  stream completes, which reduces inflate's memory footprint.  If the stream
+  does not complete, either because not all of the stream is provided or not
+  enough output space is provided, then a sliding window will be allocated and
+  inflate() can be called again to continue the operation as if Z_NO_FLUSH had
+  been used.
+
+     In this implementation, inflate() always flushes as much output as
+  possible to the output buffer, and always uses the faster approach on the
+  first call.  So the effects of the flush parameter in this implementation are
+  on the return value of inflate() as noted below, when inflate() returns early
+  when Z_BLOCK or Z_TREES is used, and when inflate() avoids the allocation of
+  memory for a sliding window when Z_FINISH is used.
+
+     If a preset dictionary is needed after this call (see inflateSetDictionary
+  below), inflate sets strm->adler to the Adler-32 checksum of the dictionary
+  chosen by the compressor and returns Z_NEED_DICT; otherwise it sets
+  strm->adler to the Adler-32 checksum of all output produced so far (that is,
+  total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described
+  below.  At the end of the stream, inflate() checks that its computed Adler-32
+  checksum is equal to that saved by the compressor and returns Z_STREAM_END
+  only if the checksum is correct.
+
+    inflate() can decompress and check either zlib-wrapped or gzip-wrapped
+  deflate data.  The header type is detected automatically, if requested when
+  initializing with inflateInit2().  Any information contained in the gzip
+  header is not retained unless inflateGetHeader() is used.  When processing
+  gzip-wrapped deflate data, strm->adler32 is set to the CRC-32 of the output
+  produced so far.  The CRC-32 is checked against the gzip trailer, as is the
+  uncompressed length, modulo 2^32.
+
+    inflate() returns Z_OK if some progress has been made (more input processed
+  or more output produced), Z_STREAM_END if the end of the compressed data has
+  been reached and all uncompressed output has been produced, Z_NEED_DICT if a
+  preset dictionary is needed at this point, Z_DATA_ERROR if the input data was
+  corrupted (input stream not conforming to the zlib format or incorrect check
+  value, in which case strm->msg points to a string with a more specific
+  error), Z_STREAM_ERROR if the stream structure was inconsistent (for example
+  next_in or next_out was Z_NULL, or the state was inadvertently written over
+  by the application), Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR
+  if no progress was possible or if there was not enough room in the output
+  buffer when Z_FINISH is used.  Note that Z_BUF_ERROR is not fatal, and
+  inflate() can be called again with more input and more output space to
+  continue decompressing.  If Z_DATA_ERROR is returned, the application may
+  then call inflateSync() to look for a good compression block if a partial
+  recovery of the data is to be attempted.
+*/
+
+
+ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm));
+/*
+     All dynamically allocated data structures for this stream are freed.
+   This function discards any unprocessed input and does not flush any pending
+   output.
+
+     inflateEnd returns Z_OK if success, or Z_STREAM_ERROR if the stream state
+   was inconsistent.
+*/
+
+
+                        /* Advanced functions */
+
+/*
+    The following functions are needed only in some special applications.
+*/
+
+/*
+ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm,
+                                     int  level,
+                                     int  method,
+                                     int  windowBits,
+                                     int  memLevel,
+                                     int  strategy));
+
+     This is another version of deflateInit with more compression options.  The
+   fields next_in, zalloc, zfree and opaque must be initialized before by the
+   caller.
+
+     The method parameter is the compression method.  It must be Z_DEFLATED in
+   this version of the library.
+
+     The windowBits parameter is the base two logarithm of the window size
+   (the size of the history buffer).  It should be in the range 8..15 for this
+   version of the library.  Larger values of this parameter result in better
+   compression at the expense of memory usage.  The default value is 15 if
+   deflateInit is used instead.
+
+     For the current implementation of deflate(), a windowBits value of 8 (a
+   window size of 256 bytes) is not supported.  As a result, a request for 8
+   will result in 9 (a 512-byte window).  In that case, providing 8 to
+   inflateInit2() will result in an error when the zlib header with 9 is
+   checked against the initialization of inflate().  The remedy is to not use 8
+   with deflateInit2() with this initialization, or at least in that case use 9
+   with inflateInit2().
+
+     windowBits can also be -8..-15 for raw deflate.  In this case, -windowBits
+   determines the window size.  deflate() will then generate raw deflate data
+   with no zlib header or trailer, and will not compute a check value.
+
+     windowBits can also be greater than 15 for optional gzip encoding.  Add
+   16 to windowBits to write a simple gzip header and trailer around the
+   compressed data instead of a zlib wrapper.  The gzip header will have no
+   file name, no extra data, no comment, no modification time (set to zero), no
+   header crc, and the operating system will be set to the appropriate value,
+   if the operating system was determined at compile time.  If a gzip stream is
+   being written, strm->adler is a CRC-32 instead of an Adler-32.
+
+     For raw deflate or gzip encoding, a request for a 256-byte window is
+   rejected as invalid, since only the zlib header provides a means of
+   transmitting the window size to the decompressor.
+
+     The memLevel parameter specifies how much memory should be allocated
+   for the internal compression state.  memLevel=1 uses minimum memory but is
+   slow and reduces compression ratio; memLevel=9 uses maximum memory for
+   optimal speed.  The default value is 8.  See zconf.h for total memory usage
+   as a function of windowBits and memLevel.
+
+     The strategy parameter is used to tune the compression algorithm.  Use the
+   value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a
+   filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no
+   string match), or Z_RLE to limit match distances to one (run-length
+   encoding).  Filtered data consists mostly of small values with a somewhat
+   random distribution.  In this case, the compression algorithm is tuned to
+   compress them better.  The effect of Z_FILTERED is to force more Huffman
+   coding and less string matching; it is somewhat intermediate between
+   Z_DEFAULT_STRATEGY and Z_HUFFMAN_ONLY.  Z_RLE is designed to be almost as
+   fast as Z_HUFFMAN_ONLY, but give better compression for PNG image data.  The
+   strategy parameter only affects the compression ratio but not the
+   correctness of the compressed output even if it is not set appropriately.
+   Z_FIXED prevents the use of dynamic Huffman codes, allowing for a simpler
+   decoder for special applications.
+
+     deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_STREAM_ERROR if any parameter is invalid (such as an invalid
+   method), or Z_VERSION_ERROR if the zlib library version (zlib_version) is
+   incompatible with the version assumed by the caller (ZLIB_VERSION).  msg is
+   set to null if there is no error message.  deflateInit2 does not perform any
+   compression: this will be done by deflate().
+*/
+
+ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm,
+                                             const Bytef *dictionary,
+                                             uInt  dictLength));
+/*
+     Initializes the compression dictionary from the given byte sequence
+   without producing any compressed output.  When using the zlib format, this
+   function must be called immediately after deflateInit, deflateInit2 or
+   deflateReset, and before any call of deflate.  When doing raw deflate, this
+   function must be called either before any call of deflate, or immediately
+   after the completion of a deflate block, i.e. after all input has been
+   consumed and all output has been delivered when using any of the flush
+   options Z_BLOCK, Z_PARTIAL_FLUSH, Z_SYNC_FLUSH, or Z_FULL_FLUSH.  The
+   compressor and decompressor must use exactly the same dictionary (see
+   inflateSetDictionary).
+
+     The dictionary should consist of strings (byte sequences) that are likely
+   to be encountered later in the data to be compressed, with the most commonly
+   used strings preferably put towards the end of the dictionary.  Using a
+   dictionary is most useful when the data to be compressed is short and can be
+   predicted with good accuracy; the data can then be compressed better than
+   with the default empty dictionary.
+
+     Depending on the size of the compression data structures selected by
+   deflateInit or deflateInit2, a part of the dictionary may in effect be
+   discarded, for example if the dictionary is larger than the window size
+   provided in deflateInit or deflateInit2.  Thus the strings most likely to be
+   useful should be put at the end of the dictionary, not at the front.  In
+   addition, the current implementation of deflate will use at most the window
+   size minus 262 bytes of the provided dictionary.
+
+     Upon return of this function, strm->adler is set to the Adler-32 value
+   of the dictionary; the decompressor may later use this value to determine
+   which dictionary has been used by the compressor.  (The Adler-32 value
+   applies to the whole dictionary even if only a subset of the dictionary is
+   actually used by the compressor.) If a raw deflate was requested, then the
+   Adler-32 value is not computed and strm->adler is not set.
+
+     deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a
+   parameter is invalid (e.g.  dictionary being Z_NULL) or the stream state is
+   inconsistent (for example if deflate has already been called for this stream
+   or if not at a block boundary for raw deflate).  deflateSetDictionary does
+   not perform any compression: this will be done by deflate().
+*/
+
+ZEXTERN int ZEXPORT deflateGetDictionary OF((z_streamp strm,
+                                             Bytef *dictionary,
+                                             uInt  *dictLength));
+/*
+     Returns the sliding dictionary being maintained by deflate.  dictLength is
+   set to the number of bytes in the dictionary, and that many bytes are copied
+   to dictionary.  dictionary must have enough space, where 32768 bytes is
+   always enough.  If deflateGetDictionary() is called with dictionary equal to
+   Z_NULL, then only the dictionary length is returned, and nothing is copied.
+   Similary, if dictLength is Z_NULL, then it is not set.
+
+     deflateGetDictionary() may return a length less than the window size, even
+   when more than the window size in input has been provided. It may return up
+   to 258 bytes less in that case, due to how zlib's implementation of deflate
+   manages the sliding window and lookahead for matches, where matches can be
+   up to 258 bytes long. If the application needs the last window-size bytes of
+   input, then that would need to be saved by the application outside of zlib.
+
+     deflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the
+   stream state is inconsistent.
+*/
+
+ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest,
+                                    z_streamp source));
+/*
+     Sets the destination stream as a complete copy of the source stream.
+
+     This function can be useful when several compression strategies will be
+   tried, for example when there are several ways of pre-processing the input
+   data with a filter.  The streams that will be discarded should then be freed
+   by calling deflateEnd.  Note that deflateCopy duplicates the internal
+   compression state which can be quite large, so this strategy is slow and can
+   consume lots of memory.
+
+     deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
+   (such as zalloc being Z_NULL).  msg is left unchanged in both source and
+   destination.
+*/
+
+ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm));
+/*
+     This function is equivalent to deflateEnd followed by deflateInit, but
+   does not free and reallocate the internal compression state.  The stream
+   will leave the compression level and any other attributes that may have been
+   set unchanged.
+
+     deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent (such as zalloc or state being Z_NULL).
+*/
+
+ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm,
+                                      int level,
+                                      int strategy));
+/*
+     Dynamically update the compression level and compression strategy.  The
+   interpretation of level and strategy is as in deflateInit2().  This can be
+   used to switch between compression and straight copy of the input data, or
+   to switch to a different kind of input data requiring a different strategy.
+   If the compression approach (which is a function of the level) or the
+   strategy is changed, and if any input has been consumed in a previous
+   deflate() call, then the input available so far is compressed with the old
+   level and strategy using deflate(strm, Z_BLOCK).  There are three approaches
+   for the compression levels 0, 1..3, and 4..9 respectively.  The new level
+   and strategy will take effect at the next call of deflate().
+
+     If a deflate(strm, Z_BLOCK) is performed by deflateParams(), and it does
+   not have enough output space to complete, then the parameter change will not
+   take effect.  In this case, deflateParams() can be called again with the
+   same parameters and more output space to try again.
+
+     In order to assure a change in the parameters on the first try, the
+   deflate stream should be flushed using deflate() with Z_BLOCK or other flush
+   request until strm.avail_out is not zero, before calling deflateParams().
+   Then no more input data should be provided before the deflateParams() call.
+   If this is done, the old level and strategy will be applied to the data
+   compressed before deflateParams(), and the new level and strategy will be
+   applied to the the data compressed after deflateParams().
+
+     deflateParams returns Z_OK on success, Z_STREAM_ERROR if the source stream
+   state was inconsistent or if a parameter was invalid, or Z_BUF_ERROR if
+   there was not enough output space to complete the compression of the
+   available input data before a change in the strategy or approach.  Note that
+   in the case of a Z_BUF_ERROR, the parameters are not changed.  A return
+   value of Z_BUF_ERROR is not fatal, in which case deflateParams() can be
+   retried with more output space.
+*/
+
+ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm,
+                                    int good_length,
+                                    int max_lazy,
+                                    int nice_length,
+                                    int max_chain));
+/*
+     Fine tune deflate's internal compression parameters.  This should only be
+   used by someone who understands the algorithm used by zlib's deflate for
+   searching for the best matching string, and even then only by the most
+   fanatic optimizer trying to squeeze out the last compressed bit for their
+   specific input data.  Read the deflate.c source code for the meaning of the
+   max_lazy, good_length, nice_length, and max_chain parameters.
+
+     deflateTune() can be called after deflateInit() or deflateInit2(), and
+   returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream.
+ */
+
+ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm,
+                                       uLong sourceLen));
+/*
+     deflateBound() returns an upper bound on the compressed size after
+   deflation of sourceLen bytes.  It must be called after deflateInit() or
+   deflateInit2(), and after deflateSetHeader(), if used.  This would be used
+   to allocate an output buffer for deflation in a single pass, and so would be
+   called before deflate().  If that first deflate() call is provided the
+   sourceLen input bytes, an output buffer allocated to the size returned by
+   deflateBound(), and the flush value Z_FINISH, then deflate() is guaranteed
+   to return Z_STREAM_END.  Note that it is possible for the compressed size to
+   be larger than the value returned by deflateBound() if flush options other
+   than Z_FINISH or Z_NO_FLUSH are used.
+*/
+
+ZEXTERN int ZEXPORT deflatePending OF((z_streamp strm,
+                                       unsigned *pending,
+                                       int *bits));
+/*
+     deflatePending() returns the number of bytes and bits of output that have
+   been generated, but not yet provided in the available output.  The bytes not
+   provided would be due to the available output space having being consumed.
+   The number of bits of output not provided are between 0 and 7, where they
+   await more bits to join them in order to fill out a full byte.  If pending
+   or bits are Z_NULL, then those values are not set.
+
+     deflatePending returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent.
+ */
+
+ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm,
+                                     int bits,
+                                     int value));
+/*
+     deflatePrime() inserts bits in the deflate output stream.  The intent
+   is that this function is used to start off the deflate output with the bits
+   leftover from a previous deflate stream when appending to it.  As such, this
+   function can only be used for raw deflate, and must be used before the first
+   deflate() call after a deflateInit2() or deflateReset().  bits must be less
+   than or equal to 16, and that many of the least significant bits of value
+   will be inserted in the output.
+
+     deflatePrime returns Z_OK if success, Z_BUF_ERROR if there was not enough
+   room in the internal buffer to insert the bits, or Z_STREAM_ERROR if the
+   source stream state was inconsistent.
+*/
+
+ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm,
+                                         gz_headerp head));
+/*
+     deflateSetHeader() provides gzip header information for when a gzip
+   stream is requested by deflateInit2().  deflateSetHeader() may be called
+   after deflateInit2() or deflateReset() and before the first call of
+   deflate().  The text, time, os, extra field, name, and comment information
+   in the provided gz_header structure are written to the gzip header (xflag is
+   ignored -- the extra flags are set according to the compression level).  The
+   caller must assure that, if not Z_NULL, name and comment are terminated with
+   a zero byte, and that if extra is not Z_NULL, that extra_len bytes are
+   available there.  If hcrc is true, a gzip header crc is included.  Note that
+   the current versions of the command-line version of gzip (up through version
+   1.3.x) do not support header crc's, and will report that it is a "multi-part
+   gzip file" and give up.
+
+     If deflateSetHeader is not used, the default gzip header has text false,
+   the time set to zero, and os set to 255, with no extra, name, or comment
+   fields.  The gzip header is returned to the default state by deflateReset().
+
+     deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent.
+*/
+
+/*
+ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm,
+                                     int  windowBits));
+
+     This is another version of inflateInit with an extra parameter.  The
+   fields next_in, avail_in, zalloc, zfree and opaque must be initialized
+   before by the caller.
+
+     The windowBits parameter is the base two logarithm of the maximum window
+   size (the size of the history buffer).  It should be in the range 8..15 for
+   this version of the library.  The default value is 15 if inflateInit is used
+   instead.  windowBits must be greater than or equal to the windowBits value
+   provided to deflateInit2() while compressing, or it must be equal to 15 if
+   deflateInit2() was not used.  If a compressed stream with a larger window
+   size is given as input, inflate() will return with the error code
+   Z_DATA_ERROR instead of trying to allocate a larger window.
+
+     windowBits can also be zero to request that inflate use the window size in
+   the zlib header of the compressed stream.
+
+     windowBits can also be -8..-15 for raw inflate.  In this case, -windowBits
+   determines the window size.  inflate() will then process raw deflate data,
+   not looking for a zlib or gzip header, not generating a check value, and not
+   looking for any check values for comparison at the end of the stream.  This
+   is for use with other formats that use the deflate compressed data format
+   such as zip.  Those formats provide their own check values.  If a custom
+   format is developed using the raw deflate format for compressed data, it is
+   recommended that a check value such as an Adler-32 or a CRC-32 be applied to
+   the uncompressed data as is done in the zlib, gzip, and zip formats.  For
+   most applications, the zlib format should be used as is.  Note that comments
+   above on the use in deflateInit2() applies to the magnitude of windowBits.
+
+     windowBits can also be greater than 15 for optional gzip decoding.  Add
+   32 to windowBits to enable zlib and gzip decoding with automatic header
+   detection, or add 16 to decode only the gzip format (the zlib format will
+   return a Z_DATA_ERROR).  If a gzip stream is being decoded, strm->adler is a
+   CRC-32 instead of an Adler-32.  Unlike the gunzip utility and gzread() (see
+   below), inflate() will not automatically decode concatenated gzip streams.
+   inflate() will return Z_STREAM_END at the end of the gzip stream.  The state
+   would need to be reset to continue decoding a subsequent gzip stream.
+
+     inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_VERSION_ERROR if the zlib library version is incompatible with the
+   version assumed by the caller, or Z_STREAM_ERROR if the parameters are
+   invalid, such as a null pointer to the structure.  msg is set to null if
+   there is no error message.  inflateInit2 does not perform any decompression
+   apart from possibly reading the zlib header if present: actual decompression
+   will be done by inflate().  (So next_in and avail_in may be modified, but
+   next_out and avail_out are unused and unchanged.) The current implementation
+   of inflateInit2() does not process any header information -- that is
+   deferred until inflate() is called.
+*/
+
+ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm,
+                                             const Bytef *dictionary,
+                                             uInt  dictLength));
+/*
+     Initializes the decompression dictionary from the given uncompressed byte
+   sequence.  This function must be called immediately after a call of inflate,
+   if that call returned Z_NEED_DICT.  The dictionary chosen by the compressor
+   can be determined from the Adler-32 value returned by that call of inflate.
+   The compressor and decompressor must use exactly the same dictionary (see
+   deflateSetDictionary).  For raw inflate, this function can be called at any
+   time to set the dictionary.  If the provided dictionary is smaller than the
+   window and there is already data in the window, then the provided dictionary
+   will amend what's there.  The application must insure that the dictionary
+   that was used for compression is provided.
+
+     inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a
+   parameter is invalid (e.g.  dictionary being Z_NULL) or the stream state is
+   inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the
+   expected one (incorrect Adler-32 value).  inflateSetDictionary does not
+   perform any decompression: this will be done by subsequent calls of
+   inflate().
+*/
+
+ZEXTERN int ZEXPORT inflateGetDictionary OF((z_streamp strm,
+                                             Bytef *dictionary,
+                                             uInt  *dictLength));
+/*
+     Returns the sliding dictionary being maintained by inflate.  dictLength is
+   set to the number of bytes in the dictionary, and that many bytes are copied
+   to dictionary.  dictionary must have enough space, where 32768 bytes is
+   always enough.  If inflateGetDictionary() is called with dictionary equal to
+   Z_NULL, then only the dictionary length is returned, and nothing is copied.
+   Similary, if dictLength is Z_NULL, then it is not set.
+
+     inflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the
+   stream state is inconsistent.
+*/
+
+ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm));
+/*
+     Skips invalid compressed data until a possible full flush point (see above
+   for the description of deflate with Z_FULL_FLUSH) can be found, or until all
+   available input is skipped.  No output is provided.
+
+     inflateSync searches for a 00 00 FF FF pattern in the compressed data.
+   All full flush points have this pattern, but not all occurrences of this
+   pattern are full flush points.
+
+     inflateSync returns Z_OK if a possible full flush point has been found,
+   Z_BUF_ERROR if no more input was provided, Z_DATA_ERROR if no flush point
+   has been found, or Z_STREAM_ERROR if the stream structure was inconsistent.
+   In the success case, the application may save the current current value of
+   total_in which indicates where valid compressed data was found.  In the
+   error case, the application may repeatedly call inflateSync, providing more
+   input each time, until success or end of the input data.
+*/
+
+ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest,
+                                    z_streamp source));
+/*
+     Sets the destination stream as a complete copy of the source stream.
+
+     This function can be useful when randomly accessing a large stream.  The
+   first pass through the stream can periodically record the inflate state,
+   allowing restarting inflate at those points when randomly accessing the
+   stream.
+
+     inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
+   (such as zalloc being Z_NULL).  msg is left unchanged in both source and
+   destination.
+*/
+
+ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm));
+/*
+     This function is equivalent to inflateEnd followed by inflateInit,
+   but does not free and reallocate the internal decompression state.  The
+   stream will keep attributes that may have been set by inflateInit2.
+
+     inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent (such as zalloc or state being Z_NULL).
+*/
+
+ZEXTERN int ZEXPORT inflateReset2 OF((z_streamp strm,
+                                      int windowBits));
+/*
+     This function is the same as inflateReset, but it also permits changing
+   the wrap and window size requests.  The windowBits parameter is interpreted
+   the same as it is for inflateInit2.  If the window size is changed, then the
+   memory allocated for the window is freed, and the window will be reallocated
+   by inflate() if needed.
+
+     inflateReset2 returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent (such as zalloc or state being Z_NULL), or if
+   the windowBits parameter is invalid.
+*/
+
+ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm,
+                                     int bits,
+                                     int value));
+/*
+     This function inserts bits in the inflate input stream.  The intent is
+   that this function is used to start inflating at a bit position in the
+   middle of a byte.  The provided bits will be used before any bytes are used
+   from next_in.  This function should only be used with raw inflate, and
+   should be used before the first inflate() call after inflateInit2() or
+   inflateReset().  bits must be less than or equal to 16, and that many of the
+   least significant bits of value will be inserted in the input.
+
+     If bits is negative, then the input stream bit buffer is emptied.  Then
+   inflatePrime() can be called again to put bits in the buffer.  This is used
+   to clear out bits leftover after feeding inflate a block description prior
+   to feeding inflate codes.
+
+     inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent.
+*/
+
+ZEXTERN long ZEXPORT inflateMark OF((z_streamp strm));
+/*
+     This function returns two values, one in the lower 16 bits of the return
+   value, and the other in the remaining upper bits, obtained by shifting the
+   return value down 16 bits.  If the upper value is -1 and the lower value is
+   zero, then inflate() is currently decoding information outside of a block.
+   If the upper value is -1 and the lower value is non-zero, then inflate is in
+   the middle of a stored block, with the lower value equaling the number of
+   bytes from the input remaining to copy.  If the upper value is not -1, then
+   it is the number of bits back from the current bit position in the input of
+   the code (literal or length/distance pair) currently being processed.  In
+   that case the lower value is the number of bytes already emitted for that
+   code.
+
+     A code is being processed if inflate is waiting for more input to complete
+   decoding of the code, or if it has completed decoding but is waiting for
+   more output space to write the literal or match data.
+
+     inflateMark() is used to mark locations in the input data for random
+   access, which may be at bit positions, and to note those cases where the
+   output of a code may span boundaries of random access blocks.  The current
+   location in the input stream can be determined from avail_in and data_type
+   as noted in the description for the Z_BLOCK flush parameter for inflate.
+
+     inflateMark returns the value noted above, or -65536 if the provided
+   source stream state was inconsistent.
+*/
+
+ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm,
+                                         gz_headerp head));
+/*
+     inflateGetHeader() requests that gzip header information be stored in the
+   provided gz_header structure.  inflateGetHeader() may be called after
+   inflateInit2() or inflateReset(), and before the first call of inflate().
+   As inflate() processes the gzip stream, head->done is zero until the header
+   is completed, at which time head->done is set to one.  If a zlib stream is
+   being decoded, then head->done is set to -1 to indicate that there will be
+   no gzip header information forthcoming.  Note that Z_BLOCK or Z_TREES can be
+   used to force inflate() to return immediately after header processing is
+   complete and before any actual data is decompressed.
+
+     The text, time, xflags, and os fields are filled in with the gzip header
+   contents.  hcrc is set to true if there is a header CRC.  (The header CRC
+   was valid if done is set to one.) If extra is not Z_NULL, then extra_max
+   contains the maximum number of bytes to write to extra.  Once done is true,
+   extra_len contains the actual extra field length, and extra contains the
+   extra field, or that field truncated if extra_max is less than extra_len.
+   If name is not Z_NULL, then up to name_max characters are written there,
+   terminated with a zero unless the length is greater than name_max.  If
+   comment is not Z_NULL, then up to comm_max characters are written there,
+   terminated with a zero unless the length is greater than comm_max.  When any
+   of extra, name, or comment are not Z_NULL and the respective field is not
+   present in the header, then that field is set to Z_NULL to signal its
+   absence.  This allows the use of deflateSetHeader() with the returned
+   structure to duplicate the header.  However if those fields are set to
+   allocated memory, then the application will need to save those pointers
+   elsewhere so that they can be eventually freed.
+
+     If inflateGetHeader is not used, then the header information is simply
+   discarded.  The header is always checked for validity, including the header
+   CRC if present.  inflateReset() will reset the process to discard the header
+   information.  The application would need to call inflateGetHeader() again to
+   retrieve the header from the next gzip stream.
+
+     inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent.
+*/
+
+/*
+ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits,
+                                        unsigned char FAR *window));
+
+     Initialize the internal stream state for decompression using inflateBack()
+   calls.  The fields zalloc, zfree and opaque in strm must be initialized
+   before the call.  If zalloc and zfree are Z_NULL, then the default library-
+   derived memory allocation routines are used.  windowBits is the base two
+   logarithm of the window size, in the range 8..15.  window is a caller
+   supplied buffer of that size.  Except for special applications where it is
+   assured that deflate was used with small window sizes, windowBits must be 15
+   and a 32K byte window must be supplied to be able to decompress general
+   deflate streams.
+
+     See inflateBack() for the usage of these routines.
+
+     inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of
+   the parameters are invalid, Z_MEM_ERROR if the internal state could not be
+   allocated, or Z_VERSION_ERROR if the version of the library does not match
+   the version of the header file.
+*/
+
+typedef unsigned (*in_func) OF((void FAR *,
+                                z_const unsigned char FAR * FAR *));
+typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned));
+
+ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm,
+                                    in_func in, void FAR *in_desc,
+                                    out_func out, void FAR *out_desc));
+/*
+     inflateBack() does a raw inflate with a single call using a call-back
+   interface for input and output.  This is potentially more efficient than
+   inflate() for file i/o applications, in that it avoids copying between the
+   output and the sliding window by simply making the window itself the output
+   buffer.  inflate() can be faster on modern CPUs when used with large
+   buffers.  inflateBack() trusts the application to not change the output
+   buffer passed by the output function, at least until inflateBack() returns.
+
+     inflateBackInit() must be called first to allocate the internal state
+   and to initialize the state with the user-provided window buffer.
+   inflateBack() may then be used multiple times to inflate a complete, raw
+   deflate stream with each call.  inflateBackEnd() is then called to free the
+   allocated state.
+
+     A raw deflate stream is one with no zlib or gzip header or trailer.
+   This routine would normally be used in a utility that reads zip or gzip
+   files and writes out uncompressed files.  The utility would decode the
+   header and process the trailer on its own, hence this routine expects only
+   the raw deflate stream to decompress.  This is different from the default
+   behavior of inflate(), which expects a zlib header and trailer around the
+   deflate stream.
+
+     inflateBack() uses two subroutines supplied by the caller that are then
+   called by inflateBack() for input and output.  inflateBack() calls those
+   routines until it reads a complete deflate stream and writes out all of the
+   uncompressed data, or until it encounters an error.  The function's
+   parameters and return types are defined above in the in_func and out_func
+   typedefs.  inflateBack() will call in(in_desc, &buf) which should return the
+   number of bytes of provided input, and a pointer to that input in buf.  If
+   there is no input available, in() must return zero -- buf is ignored in that
+   case -- and inflateBack() will return a buffer error.  inflateBack() will
+   call out(out_desc, buf, len) to write the uncompressed data buf[0..len-1].
+   out() should return zero on success, or non-zero on failure.  If out()
+   returns non-zero, inflateBack() will return with an error.  Neither in() nor
+   out() are permitted to change the contents of the window provided to
+   inflateBackInit(), which is also the buffer that out() uses to write from.
+   The length written by out() will be at most the window size.  Any non-zero
+   amount of input may be provided by in().
+
+     For convenience, inflateBack() can be provided input on the first call by
+   setting strm->next_in and strm->avail_in.  If that input is exhausted, then
+   in() will be called.  Therefore strm->next_in must be initialized before
+   calling inflateBack().  If strm->next_in is Z_NULL, then in() will be called
+   immediately for input.  If strm->next_in is not Z_NULL, then strm->avail_in
+   must also be initialized, and then if strm->avail_in is not zero, input will
+   initially be taken from strm->next_in[0 ..  strm->avail_in - 1].
+
+     The in_desc and out_desc parameters of inflateBack() is passed as the
+   first parameter of in() and out() respectively when they are called.  These
+   descriptors can be optionally used to pass any information that the caller-
+   supplied in() and out() functions need to do their job.
+
+     On return, inflateBack() will set strm->next_in and strm->avail_in to
+   pass back any unused input that was provided by the last in() call.  The
+   return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR
+   if in() or out() returned an error, Z_DATA_ERROR if there was a format error
+   in the deflate stream (in which case strm->msg is set to indicate the nature
+   of the error), or Z_STREAM_ERROR if the stream was not properly initialized.
+   In the case of Z_BUF_ERROR, an input or output error can be distinguished
+   using strm->next_in which will be Z_NULL only if in() returned an error.  If
+   strm->next_in is not Z_NULL, then the Z_BUF_ERROR was due to out() returning
+   non-zero.  (in() will always be called before out(), so strm->next_in is
+   assured to be defined if out() returns non-zero.)  Note that inflateBack()
+   cannot return Z_OK.
+*/
+
+ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm));
+/*
+     All memory allocated by inflateBackInit() is freed.
+
+     inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream
+   state was inconsistent.
+*/
+
+ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void));
+/* Return flags indicating compile-time options.
+
+    Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other:
+     1.0: size of uInt
+     3.2: size of uLong
+     5.4: size of voidpf (pointer)
+     7.6: size of z_off_t
+
+    Compiler, assembler, and debug options:
+     8: ZLIB_DEBUG
+     9: ASMV or ASMINF -- use ASM code
+     10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention
+     11: 0 (reserved)
+
+    One-time table building (smaller code, but not thread-safe if true):
+     12: BUILDFIXED -- build static block decoding tables when needed
+     13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed
+     14,15: 0 (reserved)
+
+    Library content (indicates missing functionality):
+     16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking
+                          deflate code when not needed)
+     17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect
+                    and decode gzip streams (to avoid linking crc code)
+     18-19: 0 (reserved)
+
+    Operation variations (changes in library functionality):
+     20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate
+     21: FASTEST -- deflate algorithm with only one, lowest compression level
+     22,23: 0 (reserved)
+
+    The sprintf variant used by gzprintf (zero is best):
+     24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format
+     25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure!
+     26: 0 = returns value, 1 = void -- 1 means inferred string length returned
+
+    Remainder:
+     27-31: 0 (reserved)
+ */
+
+#ifndef Z_SOLO
+
+                        /* utility functions */
+
+/*
+     The following utility functions are implemented on top of the basic
+   stream-oriented functions.  To simplify the interface, some default options
+   are assumed (compression level and memory usage, standard memory allocation
+   functions).  The source code of these utility functions can be modified if
+   you need special options.
+*/
+
+ZEXTERN int ZEXPORT compress OF((Bytef *dest,   uLongf *destLen,
+                                 const Bytef *source, uLong sourceLen));
+/*
+     Compresses the source buffer into the destination buffer.  sourceLen is
+   the byte length of the source buffer.  Upon entry, destLen is the total size
+   of the destination buffer, which must be at least the value returned by
+   compressBound(sourceLen).  Upon exit, destLen is the actual size of the
+   compressed data.  compress() is equivalent to compress2() with a level
+   parameter of Z_DEFAULT_COMPRESSION.
+
+     compress returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_BUF_ERROR if there was not enough room in the output
+   buffer.
+*/
+
+ZEXTERN int ZEXPORT compress2 OF((Bytef *dest,   uLongf *destLen,
+                                  const Bytef *source, uLong sourceLen,
+                                  int level));
+/*
+     Compresses the source buffer into the destination buffer.  The level
+   parameter has the same meaning as in deflateInit.  sourceLen is the byte
+   length of the source buffer.  Upon entry, destLen is the total size of the
+   destination buffer, which must be at least the value returned by
+   compressBound(sourceLen).  Upon exit, destLen is the actual size of the
+   compressed data.
+
+     compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_BUF_ERROR if there was not enough room in the output buffer,
+   Z_STREAM_ERROR if the level parameter is invalid.
+*/
+
+ZEXTERN uLong ZEXPORT compressBound OF((uLong sourceLen));
+/*
+     compressBound() returns an upper bound on the compressed size after
+   compress() or compress2() on sourceLen bytes.  It would be used before a
+   compress() or compress2() call to allocate the destination buffer.
+*/
+
+ZEXTERN int ZEXPORT uncompress OF((Bytef *dest,   uLongf *destLen,
+                                   const Bytef *source, uLong sourceLen));
+/*
+     Decompresses the source buffer into the destination buffer.  sourceLen is
+   the byte length of the source buffer.  Upon entry, destLen is the total size
+   of the destination buffer, which must be large enough to hold the entire
+   uncompressed data.  (The size of the uncompressed data must have been saved
+   previously by the compressor and transmitted to the decompressor by some
+   mechanism outside the scope of this compression library.) Upon exit, destLen
+   is the actual size of the uncompressed data.
+
+     uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_BUF_ERROR if there was not enough room in the output
+   buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete.  In
+   the case where there is not enough room, uncompress() will fill the output
+   buffer with the uncompressed data up to that point.
+*/
+
+ZEXTERN int ZEXPORT uncompress2 OF((Bytef *dest,   uLongf *destLen,
+                                    const Bytef *source, uLong *sourceLen));
+/*
+     Same as uncompress, except that sourceLen is a pointer, where the
+   length of the source is *sourceLen.  On return, *sourceLen is the number of
+   source bytes consumed.
+*/
+
+                        /* gzip file access functions */
+
+/*
+     This library supports reading and writing files in gzip (.gz) format with
+   an interface similar to that of stdio, using the functions that start with
+   "gz".  The gzip format is different from the zlib format.  gzip is a gzip
+   wrapper, documented in RFC 1952, wrapped around a deflate stream.
+*/
+
+typedef struct gzFile_s *gzFile;    /* semi-opaque gzip file descriptor */
+
+/*
+ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode));
+
+     Opens a gzip (.gz) file for reading or writing.  The mode parameter is as
+   in fopen ("rb" or "wb") but can also include a compression level ("wb9") or
+   a strategy: 'f' for filtered data as in "wb6f", 'h' for Huffman-only
+   compression as in "wb1h", 'R' for run-length encoding as in "wb1R", or 'F'
+   for fixed code compression as in "wb9F".  (See the description of
+   deflateInit2 for more information about the strategy parameter.)  'T' will
+   request transparent writing or appending with no compression and not using
+   the gzip format.
+
+     "a" can be used instead of "w" to request that the gzip stream that will
+   be written be appended to the file.  "+" will result in an error, since
+   reading and writing to the same gzip file is not supported.  The addition of
+   "x" when writing will create the file exclusively, which fails if the file
+   already exists.  On systems that support it, the addition of "e" when
+   reading or writing will set the flag to close the file on an execve() call.
+
+     These functions, as well as gzip, will read and decode a sequence of gzip
+   streams in a file.  The append function of gzopen() can be used to create
+   such a file.  (Also see gzflush() for another way to do this.)  When
+   appending, gzopen does not test whether the file begins with a gzip stream,
+   nor does it look for the end of the gzip streams to begin appending.  gzopen
+   will simply append a gzip stream to the existing file.
+
+     gzopen can be used to read a file which is not in gzip format; in this
+   case gzread will directly read from the file without decompression.  When
+   reading, this will be detected automatically by looking for the magic two-
+   byte gzip header.
+
+     gzopen returns NULL if the file could not be opened, if there was
+   insufficient memory to allocate the gzFile state, or if an invalid mode was
+   specified (an 'r', 'w', or 'a' was not provided, or '+' was provided).
+   errno can be checked to determine if the reason gzopen failed was that the
+   file could not be opened.
+*/
+
+ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode));
+/*
+     gzdopen associates a gzFile with the file descriptor fd.  File descriptors
+   are obtained from calls like open, dup, creat, pipe or fileno (if the file
+   has been previously opened with fopen).  The mode parameter is as in gzopen.
+
+     The next call of gzclose on the returned gzFile will also close the file
+   descriptor fd, just like fclose(fdopen(fd, mode)) closes the file descriptor
+   fd.  If you want to keep fd open, use fd = dup(fd_keep); gz = gzdopen(fd,
+   mode);.  The duplicated descriptor should be saved to avoid a leak, since
+   gzdopen does not close fd if it fails.  If you are using fileno() to get the
+   file descriptor from a FILE *, then you will have to use dup() to avoid
+   double-close()ing the file descriptor.  Both gzclose() and fclose() will
+   close the associated file descriptor, so they need to have different file
+   descriptors.
+
+     gzdopen returns NULL if there was insufficient memory to allocate the
+   gzFile state, if an invalid mode was specified (an 'r', 'w', or 'a' was not
+   provided, or '+' was provided), or if fd is -1.  The file descriptor is not
+   used until the next gz* read, write, seek, or close operation, so gzdopen
+   will not detect if fd is invalid (unless fd is -1).
+*/
+
+ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size));
+/*
+     Set the internal buffer size used by this library's functions.  The
+   default buffer size is 8192 bytes.  This function must be called after
+   gzopen() or gzdopen(), and before any other calls that read or write the
+   file.  The buffer memory allocation is always deferred to the first read or
+   write.  Three times that size in buffer space is allocated.  A larger buffer
+   size of, for example, 64K or 128K bytes will noticeably increase the speed
+   of decompression (reading).
+
+     The new buffer size also affects the maximum length for gzprintf().
+
+     gzbuffer() returns 0 on success, or -1 on failure, such as being called
+   too late.
+*/
+
+ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy));
+/*
+     Dynamically update the compression level or strategy.  See the description
+   of deflateInit2 for the meaning of these parameters.  Previously provided
+   data is flushed before the parameter change.
+
+     gzsetparams returns Z_OK if success, Z_STREAM_ERROR if the file was not
+   opened for writing, Z_ERRNO if there is an error writing the flushed data,
+   or Z_MEM_ERROR if there is a memory allocation error.
+*/
+
+ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len));
+/*
+     Reads the given number of uncompressed bytes from the compressed file.  If
+   the input file is not in gzip format, gzread copies the given number of
+   bytes into the buffer directly from the file.
+
+     After reaching the end of a gzip stream in the input, gzread will continue
+   to read, looking for another gzip stream.  Any number of gzip streams may be
+   concatenated in the input file, and will all be decompressed by gzread().
+   If something other than a gzip stream is encountered after a gzip stream,
+   that remaining trailing garbage is ignored (and no error is returned).
+
+     gzread can be used to read a gzip file that is being concurrently written.
+   Upon reaching the end of the input, gzread will return with the available
+   data.  If the error code returned by gzerror is Z_OK or Z_BUF_ERROR, then
+   gzclearerr can be used to clear the end of file indicator in order to permit
+   gzread to be tried again.  Z_OK indicates that a gzip stream was completed
+   on the last gzread.  Z_BUF_ERROR indicates that the input file ended in the
+   middle of a gzip stream.  Note that gzread does not return -1 in the event
+   of an incomplete gzip stream.  This error is deferred until gzclose(), which
+   will return Z_BUF_ERROR if the last gzread ended in the middle of a gzip
+   stream.  Alternatively, gzerror can be used before gzclose to detect this
+   case.
+
+     gzread returns the number of uncompressed bytes actually read, less than
+   len for end of file, or -1 for error.  If len is too large to fit in an int,
+   then nothing is read, -1 is returned, and the error state is set to
+   Z_STREAM_ERROR.
+*/
+
+ZEXTERN z_size_t ZEXPORT gzfread OF((voidp buf, z_size_t size, z_size_t nitems,
+                                     gzFile file));
+/*
+     Read up to nitems items of size size from file to buf, otherwise operating
+   as gzread() does.  This duplicates the interface of stdio's fread(), with
+   size_t request and return types.  If the library defines size_t, then
+   z_size_t is identical to size_t.  If not, then z_size_t is an unsigned
+   integer type that can contain a pointer.
+
+     gzfread() returns the number of full items read of size size, or zero if
+   the end of the file was reached and a full item could not be read, or if
+   there was an error.  gzerror() must be consulted if zero is returned in
+   order to determine if there was an error.  If the multiplication of size and
+   nitems overflows, i.e. the product does not fit in a z_size_t, then nothing
+   is read, zero is returned, and the error state is set to Z_STREAM_ERROR.
+
+     In the event that the end of file is reached and only a partial item is
+   available at the end, i.e. the remaining uncompressed data length is not a
+   multiple of size, then the final partial item is nevetheless read into buf
+   and the end-of-file flag is set.  The length of the partial item read is not
+   provided, but could be inferred from the result of gztell().  This behavior
+   is the same as the behavior of fread() implementations in common libraries,
+   but it prevents the direct use of gzfread() to read a concurrently written
+   file, reseting and retrying on end-of-file, when size is not 1.
+*/
+
+ZEXTERN int ZEXPORT gzwrite OF((gzFile file,
+                                voidpc buf, unsigned len));
+/*
+     Writes the given number of uncompressed bytes into the compressed file.
+   gzwrite returns the number of uncompressed bytes written or 0 in case of
+   error.
+*/
+
+ZEXTERN z_size_t ZEXPORT gzfwrite OF((voidpc buf, z_size_t size,
+                                      z_size_t nitems, gzFile file));
+/*
+     gzfwrite() writes nitems items of size size from buf to file, duplicating
+   the interface of stdio's fwrite(), with size_t request and return types.  If
+   the library defines size_t, then z_size_t is identical to size_t.  If not,
+   then z_size_t is an unsigned integer type that can contain a pointer.
+
+     gzfwrite() returns the number of full items written of size size, or zero
+   if there was an error.  If the multiplication of size and nitems overflows,
+   i.e. the product does not fit in a z_size_t, then nothing is written, zero
+   is returned, and the error state is set to Z_STREAM_ERROR.
+*/
+
+ZEXTERN int ZEXPORTVA gzprintf Z_ARG((gzFile file, const char *format, ...));
+/*
+     Converts, formats, and writes the arguments to the compressed file under
+   control of the format string, as in fprintf.  gzprintf returns the number of
+   uncompressed bytes actually written, or a negative zlib error code in case
+   of error.  The number of uncompressed bytes written is limited to 8191, or
+   one less than the buffer size given to gzbuffer().  The caller should assure
+   that this limit is not exceeded.  If it is exceeded, then gzprintf() will
+   return an error (0) with nothing written.  In this case, there may also be a
+   buffer overflow with unpredictable consequences, which is possible only if
+   zlib was compiled with the insecure functions sprintf() or vsprintf()
+   because the secure snprintf() or vsnprintf() functions were not available.
+   This can be determined using zlibCompileFlags().
+*/
+
+ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s));
+/*
+     Writes the given null-terminated string to the compressed file, excluding
+   the terminating null character.
+
+     gzputs returns the number of characters written, or -1 in case of error.
+*/
+
+ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len));
+/*
+     Reads bytes from the compressed file until len-1 characters are read, or a
+   newline character is read and transferred to buf, or an end-of-file
+   condition is encountered.  If any characters are read or if len == 1, the
+   string is terminated with a null character.  If no characters are read due
+   to an end-of-file or len < 1, then the buffer is left untouched.
+
+     gzgets returns buf which is a null-terminated string, or it returns NULL
+   for end-of-file or in case of error.  If there was an error, the contents at
+   buf are indeterminate.
+*/
+
+ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c));
+/*
+     Writes c, converted to an unsigned char, into the compressed file.  gzputc
+   returns the value that was written, or -1 in case of error.
+*/
+
+ZEXTERN int ZEXPORT gzgetc OF((gzFile file));
+/*
+     Reads one byte from the compressed file.  gzgetc returns this byte or -1
+   in case of end of file or error.  This is implemented as a macro for speed.
+   As such, it does not do all of the checking the other functions do.  I.e.
+   it does not check to see if file is NULL, nor whether the structure file
+   points to has been clobbered or not.
+*/
+
+ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file));
+/*
+     Push one character back onto the stream to be read as the first character
+   on the next read.  At least one character of push-back is allowed.
+   gzungetc() returns the character pushed, or -1 on failure.  gzungetc() will
+   fail if c is -1, and may fail if a character has been pushed but not read
+   yet.  If gzungetc is used immediately after gzopen or gzdopen, at least the
+   output buffer size of pushed characters is allowed.  (See gzbuffer above.)
+   The pushed character will be discarded if the stream is repositioned with
+   gzseek() or gzrewind().
+*/
+
+ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush));
+/*
+     Flushes all pending output into the compressed file.  The parameter flush
+   is as in the deflate() function.  The return value is the zlib error number
+   (see function gzerror below).  gzflush is only permitted when writing.
+
+     If the flush parameter is Z_FINISH, the remaining data is written and the
+   gzip stream is completed in the output.  If gzwrite() is called again, a new
+   gzip stream will be started in the output.  gzread() is able to read such
+   concatenated gzip streams.
+
+     gzflush should be called only when strictly necessary because it will
+   degrade compression if called too often.
+*/
+
+/*
+ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file,
+                                   z_off_t offset, int whence));
+
+     Sets the starting position for the next gzread or gzwrite on the given
+   compressed file.  The offset represents a number of bytes in the
+   uncompressed data stream.  The whence parameter is defined as in lseek(2);
+   the value SEEK_END is not supported.
+
+     If the file is opened for reading, this function is emulated but can be
+   extremely slow.  If the file is opened for writing, only forward seeks are
+   supported; gzseek then compresses a sequence of zeroes up to the new
+   starting position.
+
+     gzseek returns the resulting offset location as measured in bytes from
+   the beginning of the uncompressed stream, or -1 in case of error, in
+   particular if the file is opened for writing and the new starting position
+   would be before the current position.
+*/
+
+ZEXTERN int ZEXPORT    gzrewind OF((gzFile file));
+/*
+     Rewinds the given file. This function is supported only for reading.
+
+     gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET)
+*/
+
+/*
+ZEXTERN z_off_t ZEXPORT    gztell OF((gzFile file));
+
+     Returns the starting position for the next gzread or gzwrite on the given
+   compressed file.  This position represents a number of bytes in the
+   uncompressed data stream, and is zero when starting, even if appending or
+   reading a gzip stream from the middle of a file using gzdopen().
+
+     gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR)
+*/
+
+/*
+ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile file));
+
+     Returns the current offset in the file being read or written.  This offset
+   includes the count of bytes that precede the gzip stream, for example when
+   appending or when using gzdopen() for reading.  When reading, the offset
+   does not include as yet unused buffered input.  This information can be used
+   for a progress indicator.  On error, gzoffset() returns -1.
+*/
+
+ZEXTERN int ZEXPORT gzeof OF((gzFile file));
+/*
+     Returns true (1) if the end-of-file indicator has been set while reading,
+   false (0) otherwise.  Note that the end-of-file indicator is set only if the
+   read tried to go past the end of the input, but came up short.  Therefore,
+   just like feof(), gzeof() may return false even if there is no more data to
+   read, in the event that the last read request was for the exact number of
+   bytes remaining in the input file.  This will happen if the input file size
+   is an exact multiple of the buffer size.
+
+     If gzeof() returns true, then the read functions will return no more data,
+   unless the end-of-file indicator is reset by gzclearerr() and the input file
+   has grown since the previous end of file was detected.
+*/
+
+ZEXTERN int ZEXPORT gzdirect OF((gzFile file));
+/*
+     Returns true (1) if file is being copied directly while reading, or false
+   (0) if file is a gzip stream being decompressed.
+
+     If the input file is empty, gzdirect() will return true, since the input
+   does not contain a gzip stream.
+
+     If gzdirect() is used immediately after gzopen() or gzdopen() it will
+   cause buffers to be allocated to allow reading the file to determine if it
+   is a gzip file.  Therefore if gzbuffer() is used, it should be called before
+   gzdirect().
+
+     When writing, gzdirect() returns true (1) if transparent writing was
+   requested ("wT" for the gzopen() mode), or false (0) otherwise.  (Note:
+   gzdirect() is not needed when writing.  Transparent writing must be
+   explicitly requested, so the application already knows the answer.  When
+   linking statically, using gzdirect() will include all of the zlib code for
+   gzip file reading and decompression, which may not be desired.)
+*/
+
+ZEXTERN int ZEXPORT    gzclose OF((gzFile file));
+/*
+     Flushes all pending output if necessary, closes the compressed file and
+   deallocates the (de)compression state.  Note that once file is closed, you
+   cannot call gzerror with file, since its structures have been deallocated.
+   gzclose must not be called more than once on the same file, just as free
+   must not be called more than once on the same allocation.
+
+     gzclose will return Z_STREAM_ERROR if file is not valid, Z_ERRNO on a
+   file operation error, Z_MEM_ERROR if out of memory, Z_BUF_ERROR if the
+   last read ended in the middle of a gzip stream, or Z_OK on success.
+*/
+
+ZEXTERN int ZEXPORT gzclose_r OF((gzFile file));
+ZEXTERN int ZEXPORT gzclose_w OF((gzFile file));
+/*
+     Same as gzclose(), but gzclose_r() is only for use when reading, and
+   gzclose_w() is only for use when writing or appending.  The advantage to
+   using these instead of gzclose() is that they avoid linking in zlib
+   compression or decompression code that is not used when only reading or only
+   writing respectively.  If gzclose() is used, then both compression and
+   decompression code will be included the application when linking to a static
+   zlib library.
+*/
+
+ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum));
+/*
+     Returns the error message for the last error which occurred on the given
+   compressed file.  errnum is set to zlib error number.  If an error occurred
+   in the file system and not in the compression library, errnum is set to
+   Z_ERRNO and the application may consult errno to get the exact error code.
+
+     The application must not modify the returned string.  Future calls to
+   this function may invalidate the previously returned string.  If file is
+   closed, then the string previously returned by gzerror will no longer be
+   available.
+
+     gzerror() should be used to distinguish errors from end-of-file for those
+   functions above that do not distinguish those cases in their return values.
+*/
+
+ZEXTERN void ZEXPORT gzclearerr OF((gzFile file));
+/*
+     Clears the error and end-of-file flags for file.  This is analogous to the
+   clearerr() function in stdio.  This is useful for continuing to read a gzip
+   file that is being written concurrently.
+*/
+
+#endif /* !Z_SOLO */
+
+                        /* checksum functions */
+
+/*
+     These functions are not related to compression but are exported
+   anyway because they might be useful in applications using the compression
+   library.
+*/
+
+ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len));
+/*
+     Update a running Adler-32 checksum with the bytes buf[0..len-1] and
+   return the updated checksum.  If buf is Z_NULL, this function returns the
+   required initial value for the checksum.
+
+     An Adler-32 checksum is almost as reliable as a CRC-32 but can be computed
+   much faster.
+
+   Usage example:
+
+     uLong adler = adler32(0L, Z_NULL, 0);
+
+     while (read_buffer(buffer, length) != EOF) {
+       adler = adler32(adler, buffer, length);
+     }
+     if (adler != original_adler) error();
+*/
+
+ZEXTERN uLong ZEXPORT adler32_z OF((uLong adler, const Bytef *buf,
+                                    z_size_t len));
+/*
+     Same as adler32(), but with a size_t length.
+*/
+
+/*
+ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2,
+                                          z_off_t len2));
+
+     Combine two Adler-32 checksums into one.  For two sequences of bytes, seq1
+   and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for
+   each, adler1 and adler2.  adler32_combine() returns the Adler-32 checksum of
+   seq1 and seq2 concatenated, requiring only adler1, adler2, and len2.  Note
+   that the z_off_t type (like off_t) is a signed integer.  If len2 is
+   negative, the result has no meaning or utility.
+*/
+
+ZEXTERN uLong ZEXPORT crc32   OF((uLong crc, const Bytef *buf, uInt len));
+/*
+     Update a running CRC-32 with the bytes buf[0..len-1] and return the
+   updated CRC-32.  If buf is Z_NULL, this function returns the required
+   initial value for the crc.  Pre- and post-conditioning (one's complement) is
+   performed within this function so it shouldn't be done by the application.
+
+   Usage example:
+
+     uLong crc = crc32(0L, Z_NULL, 0);
+
+     while (read_buffer(buffer, length) != EOF) {
+       crc = crc32(crc, buffer, length);
+     }
+     if (crc != original_crc) error();
+*/
+
+ZEXTERN uLong ZEXPORT crc32_z OF((uLong adler, const Bytef *buf,
+                                  z_size_t len));
+/*
+     Same as crc32(), but with a size_t length.
+*/
+
+/*
+ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2));
+
+     Combine two CRC-32 check values into one.  For two sequences of bytes,
+   seq1 and seq2 with lengths len1 and len2, CRC-32 check values were
+   calculated for each, crc1 and crc2.  crc32_combine() returns the CRC-32
+   check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and
+   len2.
+*/
+
+
+                        /* various hacks, don't look :) */
+
+/* deflateInit and inflateInit are macros to allow checking the zlib version
+ * and the compiler's view of z_stream:
+ */
+ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level,
+                                     const char *version, int stream_size));
+ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm,
+                                     const char *version, int stream_size));
+ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int  level, int  method,
+                                      int windowBits, int memLevel,
+                                      int strategy, const char *version,
+                                      int stream_size));
+ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int  windowBits,
+                                      const char *version, int stream_size));
+ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits,
+                                         unsigned char FAR *window,
+                                         const char *version,
+                                         int stream_size));
+#ifdef Z_PREFIX_SET
+#  define z_deflateInit(strm, level) \
+          deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define z_inflateInit(strm) \
+          inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define z_deflateInit2(strm, level, method, windowBits, memLevel, strategy) \
+          deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\
+                        (strategy), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define z_inflateInit2(strm, windowBits) \
+          inflateInit2_((strm), (windowBits), ZLIB_VERSION, \
+                        (int)sizeof(z_stream))
+#  define z_inflateBackInit(strm, windowBits, window) \
+          inflateBackInit_((strm), (windowBits), (window), \
+                           ZLIB_VERSION, (int)sizeof(z_stream))
+#else
+#  define deflateInit(strm, level) \
+          deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define inflateInit(strm) \
+          inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \
+          deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\
+                        (strategy), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define inflateInit2(strm, windowBits) \
+          inflateInit2_((strm), (windowBits), ZLIB_VERSION, \
+                        (int)sizeof(z_stream))
+#  define inflateBackInit(strm, windowBits, window) \
+          inflateBackInit_((strm), (windowBits), (window), \
+                           ZLIB_VERSION, (int)sizeof(z_stream))
+#endif
+
+#ifndef Z_SOLO
+
+/* gzgetc() macro and its supporting function and exposed data structure.  Note
+ * that the real internal state is much larger than the exposed structure.
+ * This abbreviated structure exposes just enough for the gzgetc() macro.  The
+ * user should not mess with these exposed elements, since their names or
+ * behavior could change in the future, perhaps even capriciously.  They can
+ * only be used by the gzgetc() macro.  You have been warned.
+ */
+struct gzFile_s {
+    unsigned have;
+    unsigned char *next;
+    z_off64_t pos;
+};
+ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file));  /* backward compatibility */
+#ifdef Z_PREFIX_SET
+#  undef z_gzgetc
+#  define z_gzgetc(g) \
+          ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g))
+#else
+#  define gzgetc(g) \
+          ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g))
+#endif
+
+/* provide 64-bit offset functions if _LARGEFILE64_SOURCE defined, and/or
+ * change the regular functions to 64 bits if _FILE_OFFSET_BITS is 64 (if
+ * both are true, the application gets the *64 functions, and the regular
+ * functions are changed to 64 bits) -- in case these are set on systems
+ * without large file support, _LFS64_LARGEFILE must also be true
+ */
+#ifdef Z_LARGE64
+   ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *));
+   ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int));
+   ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile));
+   ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile));
+   ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off64_t));
+   ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off64_t));
+#endif
+
+#if !defined(ZLIB_INTERNAL) && defined(Z_WANT64)
+#  ifdef Z_PREFIX_SET
+#    define z_gzopen z_gzopen64
+#    define z_gzseek z_gzseek64
+#    define z_gztell z_gztell64
+#    define z_gzoffset z_gzoffset64
+#    define z_adler32_combine z_adler32_combine64
+#    define z_crc32_combine z_crc32_combine64
+#  else
+#    define gzopen gzopen64
+#    define gzseek gzseek64
+#    define gztell gztell64
+#    define gzoffset gzoffset64
+#    define adler32_combine adler32_combine64
+#    define crc32_combine crc32_combine64
+#  endif
+#  ifndef Z_LARGE64
+     ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *));
+     ZEXTERN z_off_t ZEXPORT gzseek64 OF((gzFile, z_off_t, int));
+     ZEXTERN z_off_t ZEXPORT gztell64 OF((gzFile));
+     ZEXTERN z_off_t ZEXPORT gzoffset64 OF((gzFile));
+     ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t));
+     ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t));
+#  endif
+#else
+   ZEXTERN gzFile ZEXPORT gzopen OF((const char *, const char *));
+   ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile, z_off_t, int));
+   ZEXTERN z_off_t ZEXPORT gztell OF((gzFile));
+   ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile));
+   ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t));
+   ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t));
+#endif
+
+#else /* Z_SOLO */
+
+   ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t));
+   ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t));
+
+#endif /* !Z_SOLO */
+
+/* undocumented functions */
+ZEXTERN const char   * ZEXPORT zError           OF((int));
+ZEXTERN int            ZEXPORT inflateSyncPoint OF((z_streamp));
+ZEXTERN const z_crc_t FAR * ZEXPORT get_crc_table    OF((void));
+ZEXTERN int            ZEXPORT inflateUndermine OF((z_streamp, int));
+ZEXTERN int            ZEXPORT inflateValidate OF((z_streamp, int));
+ZEXTERN unsigned long  ZEXPORT inflateCodesUsed OF ((z_streamp));
+ZEXTERN int            ZEXPORT inflateResetKeep OF((z_streamp));
+ZEXTERN int            ZEXPORT deflateResetKeep OF((z_streamp));
+#if (defined(_WIN32) || defined(__CYGWIN__)) && !defined(Z_SOLO)
+ZEXTERN gzFile         ZEXPORT gzopen_w OF((const wchar_t *path,
+                                            const char *mode));
+#endif
+#if defined(STDC) || defined(Z_HAVE_STDARG_H)
+#  ifndef Z_SOLO
+ZEXTERN int            ZEXPORTVA gzvprintf Z_ARG((gzFile file,
+                                                  const char *format,
+                                                  va_list va));
+#  endif
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ZLIB_H */
diff --git a/libraries/zlib/zutil.c b/libraries/zlib/zutil.c
new file mode 100644
index 000000000..a76c6b0c7
--- /dev/null
+++ b/libraries/zlib/zutil.c
@@ -0,0 +1,325 @@
+/* zutil.c -- target dependent utility functions for the compression library
+ * Copyright (C) 1995-2017 Jean-loup Gailly
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* @(#) $Id$ */
+
+#include "zutil.h"
+#ifndef Z_SOLO
+#  include "gzguts.h"
+#endif
+
+z_const char * const z_errmsg[10] = {
+    (z_const char *)"need dictionary",     /* Z_NEED_DICT       2  */
+    (z_const char *)"stream end",          /* Z_STREAM_END      1  */
+    (z_const char *)"",                    /* Z_OK              0  */
+    (z_const char *)"file error",          /* Z_ERRNO         (-1) */
+    (z_const char *)"stream error",        /* Z_STREAM_ERROR  (-2) */
+    (z_const char *)"data error",          /* Z_DATA_ERROR    (-3) */
+    (z_const char *)"insufficient memory", /* Z_MEM_ERROR     (-4) */
+    (z_const char *)"buffer error",        /* Z_BUF_ERROR     (-5) */
+    (z_const char *)"incompatible version",/* Z_VERSION_ERROR (-6) */
+    (z_const char *)""
+};
+
+
+const char * ZEXPORT zlibVersion()
+{
+    return ZLIB_VERSION;
+}
+
+uLong ZEXPORT zlibCompileFlags()
+{
+    uLong flags;
+
+    flags = 0;
+    switch ((int)(sizeof(uInt))) {
+    case 2:     break;
+    case 4:     flags += 1;     break;
+    case 8:     flags += 2;     break;
+    default:    flags += 3;
+    }
+    switch ((int)(sizeof(uLong))) {
+    case 2:     break;
+    case 4:     flags += 1 << 2;        break;
+    case 8:     flags += 2 << 2;        break;
+    default:    flags += 3 << 2;
+    }
+    switch ((int)(sizeof(voidpf))) {
+    case 2:     break;
+    case 4:     flags += 1 << 4;        break;
+    case 8:     flags += 2 << 4;        break;
+    default:    flags += 3 << 4;
+    }
+    switch ((int)(sizeof(z_off_t))) {
+    case 2:     break;
+    case 4:     flags += 1 << 6;        break;
+    case 8:     flags += 2 << 6;        break;
+    default:    flags += 3 << 6;
+    }
+#ifdef ZLIB_DEBUG
+    flags += 1 << 8;
+#endif
+#if defined(ASMV) || defined(ASMINF)
+    flags += 1 << 9;
+#endif
+#ifdef ZLIB_WINAPI
+    flags += 1 << 10;
+#endif
+#ifdef BUILDFIXED
+    flags += 1 << 12;
+#endif
+#ifdef DYNAMIC_CRC_TABLE
+    flags += 1 << 13;
+#endif
+#ifdef NO_GZCOMPRESS
+    flags += 1L << 16;
+#endif
+#ifdef NO_GZIP
+    flags += 1L << 17;
+#endif
+#ifdef PKZIP_BUG_WORKAROUND
+    flags += 1L << 20;
+#endif
+#ifdef FASTEST
+    flags += 1L << 21;
+#endif
+#if defined(STDC) || defined(Z_HAVE_STDARG_H)
+#  ifdef NO_vsnprintf
+    flags += 1L << 25;
+#    ifdef HAS_vsprintf_void
+    flags += 1L << 26;
+#    endif
+#  else
+#    ifdef HAS_vsnprintf_void
+    flags += 1L << 26;
+#    endif
+#  endif
+#else
+    flags += 1L << 24;
+#  ifdef NO_snprintf
+    flags += 1L << 25;
+#    ifdef HAS_sprintf_void
+    flags += 1L << 26;
+#    endif
+#  else
+#    ifdef HAS_snprintf_void
+    flags += 1L << 26;
+#    endif
+#  endif
+#endif
+    return flags;
+}
+
+#ifdef ZLIB_DEBUG
+#include <stdlib.h>
+#  ifndef verbose
+#    define verbose 0
+#  endif
+int ZLIB_INTERNAL z_verbose = verbose;
+
+void ZLIB_INTERNAL z_error (m)
+    char *m;
+{
+    fprintf(stderr, "%s\n", m);
+    exit(1);
+}
+#endif
+
+/* exported to allow conversion of error code to string for compress() and
+ * uncompress()
+ */
+const char * ZEXPORT zError(err)
+    int err;
+{
+    return ERR_MSG(err);
+}
+
+#if defined(_WIN32_WCE)
+    /* The Microsoft C Run-Time Library for Windows CE doesn't have
+     * errno.  We define it as a global variable to simplify porting.
+     * Its value is always 0 and should not be used.
+     */
+    int errno = 0;
+#endif
+
+#ifndef HAVE_MEMCPY
+
+void ZLIB_INTERNAL zmemcpy(dest, source, len)
+    Bytef* dest;
+    const Bytef* source;
+    uInt  len;
+{
+    if (len == 0) return;
+    do {
+        *dest++ = *source++; /* ??? to be unrolled */
+    } while (--len != 0);
+}
+
+int ZLIB_INTERNAL zmemcmp(s1, s2, len)
+    const Bytef* s1;
+    const Bytef* s2;
+    uInt  len;
+{
+    uInt j;
+
+    for (j = 0; j < len; j++) {
+        if (s1[j] != s2[j]) return 2*(s1[j] > s2[j])-1;
+    }
+    return 0;
+}
+
+void ZLIB_INTERNAL zmemzero(dest, len)
+    Bytef* dest;
+    uInt  len;
+{
+    if (len == 0) return;
+    do {
+        *dest++ = 0;  /* ??? to be unrolled */
+    } while (--len != 0);
+}
+#endif
+
+#ifndef Z_SOLO
+
+#ifdef SYS16BIT
+
+#ifdef __TURBOC__
+/* Turbo C in 16-bit mode */
+
+#  define MY_ZCALLOC
+
+/* Turbo C malloc() does not allow dynamic allocation of 64K bytes
+ * and farmalloc(64K) returns a pointer with an offset of 8, so we
+ * must fix the pointer. Warning: the pointer must be put back to its
+ * original form in order to free it, use zcfree().
+ */
+
+#define MAX_PTR 10
+/* 10*64K = 640K */
+
+local int next_ptr = 0;
+
+typedef struct ptr_table_s {
+    voidpf org_ptr;
+    voidpf new_ptr;
+} ptr_table;
+
+local ptr_table table[MAX_PTR];
+/* This table is used to remember the original form of pointers
+ * to large buffers (64K). Such pointers are normalized with a zero offset.
+ * Since MSDOS is not a preemptive multitasking OS, this table is not
+ * protected from concurrent access. This hack doesn't work anyway on
+ * a protected system like OS/2. Use Microsoft C instead.
+ */
+
+voidpf ZLIB_INTERNAL zcalloc (voidpf opaque, unsigned items, unsigned size)
+{
+    voidpf buf;
+    ulg bsize = (ulg)items*size;
+
+    (void)opaque;
+
+    /* If we allocate less than 65520 bytes, we assume that farmalloc
+     * will return a usable pointer which doesn't have to be normalized.
+     */
+    if (bsize < 65520L) {
+        buf = farmalloc(bsize);
+        if (*(ush*)&buf != 0) return buf;
+    } else {
+        buf = farmalloc(bsize + 16L);
+    }
+    if (buf == NULL || next_ptr >= MAX_PTR) return NULL;
+    table[next_ptr].org_ptr = buf;
+
+    /* Normalize the pointer to seg:0 */
+    *((ush*)&buf+1) += ((ush)((uch*)buf-0) + 15) >> 4;
+    *(ush*)&buf = 0;
+    table[next_ptr++].new_ptr = buf;
+    return buf;
+}
+
+void ZLIB_INTERNAL zcfree (voidpf opaque, voidpf ptr)
+{
+    int n;
+
+    (void)opaque;
+
+    if (*(ush*)&ptr != 0) { /* object < 64K */
+        farfree(ptr);
+        return;
+    }
+    /* Find the original pointer */
+    for (n = 0; n < next_ptr; n++) {
+        if (ptr != table[n].new_ptr) continue;
+
+        farfree(table[n].org_ptr);
+        while (++n < next_ptr) {
+            table[n-1] = table[n];
+        }
+        next_ptr--;
+        return;
+    }
+    Assert(0, "zcfree: ptr not found");
+}
+
+#endif /* __TURBOC__ */
+
+
+#ifdef M_I86
+/* Microsoft C in 16-bit mode */
+
+#  define MY_ZCALLOC
+
+#if (!defined(_MSC_VER) || (_MSC_VER <= 600))
+#  define _halloc  halloc
+#  define _hfree   hfree
+#endif
+
+voidpf ZLIB_INTERNAL zcalloc (voidpf opaque, uInt items, uInt size)
+{
+    (void)opaque;
+    return _halloc((long)items, size);
+}
+
+void ZLIB_INTERNAL zcfree (voidpf opaque, voidpf ptr)
+{
+    (void)opaque;
+    _hfree(ptr);
+}
+
+#endif /* M_I86 */
+
+#endif /* SYS16BIT */
+
+
+#ifndef MY_ZCALLOC /* Any system without a special alloc function */
+
+#ifndef STDC
+extern voidp  malloc OF((uInt size));
+extern voidp  calloc OF((uInt items, uInt size));
+extern void   free   OF((voidpf ptr));
+#endif
+
+voidpf ZLIB_INTERNAL zcalloc (opaque, items, size)
+    voidpf opaque;
+    unsigned items;
+    unsigned size;
+{
+    (void)opaque;
+    return sizeof(uInt) > 2 ? (voidpf)malloc(items * size) :
+                              (voidpf)calloc(items, size);
+}
+
+void ZLIB_INTERNAL zcfree (opaque, ptr)
+    voidpf opaque;
+    voidpf ptr;
+{
+    (void)opaque;
+    free(ptr);
+}
+
+#endif /* MY_ZCALLOC */
+
+#endif /* !Z_SOLO */
diff --git a/libraries/zlib/zutil.h b/libraries/zlib/zutil.h
new file mode 100644
index 000000000..b079ea6a8
--- /dev/null
+++ b/libraries/zlib/zutil.h
@@ -0,0 +1,271 @@
+/* zutil.h -- internal interface and configuration of the compression library
+ * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+   part of the implementation of the compression library and is
+   subject to change. Applications should only use zlib.h.
+ */
+
+/* @(#) $Id$ */
+
+#ifndef ZUTIL_H
+#define ZUTIL_H
+
+#ifdef HAVE_HIDDEN
+#  define ZLIB_INTERNAL __attribute__((visibility ("hidden")))
+#else
+#  define ZLIB_INTERNAL
+#endif
+
+#include "zlib.h"
+
+#if defined(STDC) && !defined(Z_SOLO)
+#  if !(defined(_WIN32_WCE) && defined(_MSC_VER))
+#    include <stddef.h>
+#  endif
+#  include <string.h>
+#  include <stdlib.h>
+#endif
+
+#ifdef Z_SOLO
+   typedef long ptrdiff_t;  /* guess -- will be caught if guess is wrong */
+#endif
+
+#ifndef local
+#  define local static
+#endif
+/* since "static" is used to mean two completely different things in C, we
+   define "local" for the non-static meaning of "static", for readability
+   (compile with -Dlocal if your debugger can't find static symbols) */
+
+typedef unsigned char  uch;
+typedef uch FAR uchf;
+typedef unsigned short ush;
+typedef ush FAR ushf;
+typedef unsigned long  ulg;
+
+extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
+/* (size given to avoid silly warnings with Visual C++) */
+
+#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)]
+
+#define ERR_RETURN(strm,err) \
+  return (strm->msg = ERR_MSG(err), (err))
+/* To be used only when the state is known to be valid */
+
+        /* common constants */
+
+#ifndef DEF_WBITS
+#  define DEF_WBITS MAX_WBITS
+#endif
+/* default windowBits for decompression. MAX_WBITS is for compression only */
+
+#if MAX_MEM_LEVEL >= 8
+#  define DEF_MEM_LEVEL 8
+#else
+#  define DEF_MEM_LEVEL  MAX_MEM_LEVEL
+#endif
+/* default memLevel */
+
+#define STORED_BLOCK 0
+#define STATIC_TREES 1
+#define DYN_TREES    2
+/* The three kinds of block type */
+
+#define MIN_MATCH  3
+#define MAX_MATCH  258
+/* The minimum and maximum match lengths */
+
+#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */
+
+        /* target dependencies */
+
+#if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32))
+#  define OS_CODE  0x00
+#  ifndef Z_SOLO
+#    if defined(__TURBOC__) || defined(__BORLANDC__)
+#      if (__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__))
+         /* Allow compilation with ANSI keywords only enabled */
+         void _Cdecl farfree( void *block );
+         void *_Cdecl farmalloc( unsigned long nbytes );
+#      else
+#        include <alloc.h>
+#      endif
+#    else /* MSC or DJGPP */
+#      include <malloc.h>
+#    endif
+#  endif
+#endif
+
+#ifdef AMIGA
+#  define OS_CODE  1
+#endif
+
+#if defined(VAXC) || defined(VMS)
+#  define OS_CODE  2
+#  define F_OPEN(name, mode) \
+     fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512")
+#endif
+
+#ifdef __370__
+#  if __TARGET_LIB__ < 0x20000000
+#    define OS_CODE 4
+#  elif __TARGET_LIB__ < 0x40000000
+#    define OS_CODE 11
+#  else
+#    define OS_CODE 8
+#  endif
+#endif
+
+#if defined(ATARI) || defined(atarist)
+#  define OS_CODE  5
+#endif
+
+#ifdef OS2
+#  define OS_CODE  6
+#  if defined(M_I86) && !defined(Z_SOLO)
+#    include <malloc.h>
+#  endif
+#endif
+
+#if defined(MACOS) || defined(TARGET_OS_MAC)
+#  define OS_CODE  7
+#  ifndef Z_SOLO
+#    if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os
+#      include <unix.h> /* for fdopen */
+#    else
+#      ifndef fdopen
+#        define fdopen(fd,mode) NULL /* No fdopen() */
+#      endif
+#    endif
+#  endif
+#endif
+
+#ifdef __acorn
+#  define OS_CODE 13
+#endif
+
+#if defined(WIN32) && !defined(__CYGWIN__)
+#  define OS_CODE  10
+#endif
+
+#ifdef _BEOS_
+#  define OS_CODE  16
+#endif
+
+#ifdef __TOS_OS400__
+#  define OS_CODE 18
+#endif
+
+#ifdef __APPLE__
+#  define OS_CODE 19
+#endif
+
+#if defined(_BEOS_) || defined(RISCOS)
+#  define fdopen(fd,mode) NULL /* No fdopen() */
+#endif
+
+#if (defined(_MSC_VER) && (_MSC_VER > 600)) && !defined __INTERIX
+#  if defined(_WIN32_WCE)
+#    define fdopen(fd,mode) NULL /* No fdopen() */
+#    ifndef _PTRDIFF_T_DEFINED
+       typedef int ptrdiff_t;
+#      define _PTRDIFF_T_DEFINED
+#    endif
+#  else
+#    define fdopen(fd,type)  _fdopen(fd,type)
+#  endif
+#endif
+
+#if defined(__BORLANDC__) && !defined(MSDOS)
+  #pragma warn -8004
+  #pragma warn -8008
+  #pragma warn -8066
+#endif
+
+/* provide prototypes for these when building zlib without LFS */
+#if !defined(_WIN32) && \
+    (!defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0)
+    ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t));
+    ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t));
+#endif
+
+        /* common defaults */
+
+#ifndef OS_CODE
+#  define OS_CODE  3     /* assume Unix */
+#endif
+
+#ifndef F_OPEN
+#  define F_OPEN(name, mode) fopen((name), (mode))
+#endif
+
+         /* functions */
+
+#if defined(pyr) || defined(Z_SOLO)
+#  define NO_MEMCPY
+#endif
+#if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__)
+ /* Use our own functions for small and medium model with MSC <= 5.0.
+  * You may have to use the same strategy for Borland C (untested).
+  * The __SC__ check is for Symantec.
+  */
+#  define NO_MEMCPY
+#endif
+#if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY)
+#  define HAVE_MEMCPY
+#endif
+#ifdef HAVE_MEMCPY
+#  ifdef SMALL_MEDIUM /* MSDOS small or medium model */
+#    define zmemcpy _fmemcpy
+#    define zmemcmp _fmemcmp
+#    define zmemzero(dest, len) _fmemset(dest, 0, len)
+#  else
+#    define zmemcpy memcpy
+#    define zmemcmp memcmp
+#    define zmemzero(dest, len) memset(dest, 0, len)
+#  endif
+#else
+   void ZLIB_INTERNAL zmemcpy OF((Bytef* dest, const Bytef* source, uInt len));
+   int ZLIB_INTERNAL zmemcmp OF((const Bytef* s1, const Bytef* s2, uInt len));
+   void ZLIB_INTERNAL zmemzero OF((Bytef* dest, uInt len));
+#endif
+
+/* Diagnostic functions */
+#ifdef ZLIB_DEBUG
+#  include <stdio.h>
+   extern int ZLIB_INTERNAL z_verbose;
+   extern void ZLIB_INTERNAL z_error OF((char *m));
+#  define Assert(cond,msg) {if(!(cond)) z_error(msg);}
+#  define Trace(x) {if (z_verbose>=0) fprintf x ;}
+#  define Tracev(x) {if (z_verbose>0) fprintf x ;}
+#  define Tracevv(x) {if (z_verbose>1) fprintf x ;}
+#  define Tracec(c,x) {if (z_verbose>0 && (c)) fprintf x ;}
+#  define Tracecv(c,x) {if (z_verbose>1 && (c)) fprintf x ;}
+#else
+#  define Assert(cond,msg)
+#  define Trace(x)
+#  define Tracev(x)
+#  define Tracevv(x)
+#  define Tracec(c,x)
+#  define Tracecv(c,x)
+#endif
+
+#ifndef Z_SOLO
+   voidpf ZLIB_INTERNAL zcalloc OF((voidpf opaque, unsigned items,
+                                    unsigned size));
+   void ZLIB_INTERNAL zcfree  OF((voidpf opaque, voidpf ptr));
+#endif
+
+#define ZALLOC(strm, items, size) \
+           (*((strm)->zalloc))((strm)->opaque, (items), (size))
+#define ZFREE(strm, addr)  (*((strm)->zfree))((strm)->opaque, (voidpf)(addr))
+#define TRY_FREE(s, p) {if (p) ZFREE(s, p);}
+
+/* Reverse the bytes in a 32-bit value */
+#define ZSWAP32(q) ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \
+                    (((q) & 0xff00) << 8) + (((q) & 0xff) << 24))
+
+#endif /* ZUTIL_H */